[llvm] [TargetLowering][RISCV] Avoid ISD::MUL in expandCLMUL if hasBitTest or MUL requires a library call. (PR #182389)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 20 14:20:41 PST 2026
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/182389
>From c370d6b04f6bd07b6dd16cd46bfe672753957399 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 19 Feb 2026 14:31:36 -0800
Subject: [PATCH 1/2] [TargetLowering][RISCV] Don't use ISD::MUL in expandCLMUL
if it would generate a library call.
Scalar multiply is not part of the most basic RISC-V ISA. Use a
and+setcc+mul for these targets.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 18 +-
llvm/test/CodeGen/RISCV/clmul.ll | 15898 +++++++++-------
2 files changed, 9538 insertions(+), 6378 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7d0f8c632ba18..62e58f18ff024 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8468,12 +8468,26 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
// NOTE: If you change this expansion, please update the cost model
// calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
// Intrinsic::clmul.
+
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
SDValue Res = DAG.getConstant(0, DL, VT);
for (unsigned I = 0; I < BW; ++I) {
SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
- SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
- Res = DAG.getNode(ISD::XOR, DL, VT, Res, Mul);
+ SDValue Part;
+ if (isOperationLegalOrCustom(
+ ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
+ Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
+ } else {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETNE);
+ SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X,
+ DAG.getShiftAmountConstant(I, VT, DL));
+ Part = DAG.getSelect(DL, VT, Cond, XShifted, Zero);
+ }
+ Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
}
return Res;
}
diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll
index 1c5a33e7b4377..7ad597892d615 100644
--- a/llvm/test/CodeGen/RISCV/clmul.ll
+++ b/llvm/test/CodeGen/RISCV/clmul.ll
@@ -9,70 +9,52 @@
define i4 @clmul_i4(i4 %a, i4 %b) nounwind {
; RV32I-LABEL: clmul_i4:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s0, 1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s0, 4
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s0, 8
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: slli a2, a1, 30
+; RV32I-NEXT: slli a3, a0, 1
+; RV32I-NEXT: slli a4, a1, 29
+; RV32I-NEXT: srli a2, a2, 31
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: slli a3, a0, 2
+; RV32I-NEXT: srli a4, a4, 31
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: and a3, a4, a3
+; RV32I-NEXT: slli a4, a1, 31
+; RV32I-NEXT: slli a1, a1, 28
+; RV32I-NEXT: srai a4, a4, 31
+; RV32I-NEXT: and a4, a4, a0
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: srli a1, a1, 31
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: xor a2, a4, a2
+; RV32I-NEXT: xor a0, a3, a0
+; RV32I-NEXT: xor a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: clmul_i4:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a1
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s0, 1
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s0, 4
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s0, 8
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: slli a2, a1, 62
+; RV64I-NEXT: slli a3, a0, 1
+; RV64I-NEXT: slli a4, a1, 61
+; RV64I-NEXT: srli a2, a2, 63
+; RV64I-NEXT: neg a2, a2
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: slli a3, a0, 2
+; RV64I-NEXT: srli a4, a4, 63
+; RV64I-NEXT: neg a4, a4
+; RV64I-NEXT: and a3, a4, a3
+; RV64I-NEXT: slli a4, a1, 63
+; RV64I-NEXT: slli a1, a1, 60
+; RV64I-NEXT: srai a4, a4, 63
+; RV64I-NEXT: and a4, a4, a0
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: srli a1, a1, 63
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: xor a2, a4, a2
+; RV64I-NEXT: xor a0, a3, a0
+; RV64I-NEXT: xor a0, a2, a0
; RV64I-NEXT: ret
;
; CHECK-M-LABEL: clmul_i4:
@@ -111,104 +93,100 @@ define i4 @clmul_i4(i4 %a, i4 %b) nounwind {
define i8 @clmul_i8(i8 %a, i8 %b) nounwind {
; RV32I-LABEL: clmul_i8:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s2, s3, a0
-; RV32I-NEXT: andi a1, s1, -128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: slli a2, a1, 30
+; RV32I-NEXT: slli a3, a0, 1
+; RV32I-NEXT: slli a4, a1, 29
+; RV32I-NEXT: slli a5, a0, 2
+; RV32I-NEXT: slli a6, a1, 28
+; RV32I-NEXT: slli a7, a0, 3
+; RV32I-NEXT: slli t0, a1, 27
+; RV32I-NEXT: srli a2, a2, 31
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: slli a3, a0, 4
+; RV32I-NEXT: srli a4, a4, 31
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: slli a5, a1, 26
+; RV32I-NEXT: srli a6, a6, 31
+; RV32I-NEXT: neg a6, a6
+; RV32I-NEXT: and a6, a6, a7
+; RV32I-NEXT: slli a7, a0, 5
+; RV32I-NEXT: srli t0, t0, 31
+; RV32I-NEXT: neg t0, t0
+; RV32I-NEXT: and a3, t0, a3
+; RV32I-NEXT: slli t0, a1, 25
+; RV32I-NEXT: srli a5, a5, 31
+; RV32I-NEXT: neg a5, a5
+; RV32I-NEXT: and a5, a5, a7
+; RV32I-NEXT: slli a7, a0, 6
+; RV32I-NEXT: srli t0, t0, 31
+; RV32I-NEXT: neg t0, t0
+; RV32I-NEXT: and a7, t0, a7
+; RV32I-NEXT: slli t0, a1, 31
+; RV32I-NEXT: srai t0, t0, 31
+; RV32I-NEXT: and t0, t0, a0
+; RV32I-NEXT: xor a2, t0, a2
+; RV32I-NEXT: xor a4, a4, a6
+; RV32I-NEXT: xor a3, a3, a5
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: slli a0, a0, 7
+; RV32I-NEXT: srli a1, a1, 31
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: xor a2, a2, a4
+; RV32I-NEXT: xor a3, a3, a7
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: xor a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: clmul_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: andi a1, s1, -128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: slli a2, a1, 62
+; RV64I-NEXT: slli a3, a0, 1
+; RV64I-NEXT: slli a4, a1, 61
+; RV64I-NEXT: slli a5, a0, 2
+; RV64I-NEXT: slli a6, a1, 60
+; RV64I-NEXT: slli a7, a0, 3
+; RV64I-NEXT: slli t0, a1, 59
+; RV64I-NEXT: srli a2, a2, 63
+; RV64I-NEXT: neg a2, a2
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: slli a3, a0, 4
+; RV64I-NEXT: srli a4, a4, 63
+; RV64I-NEXT: neg a4, a4
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 58
+; RV64I-NEXT: srli a6, a6, 63
+; RV64I-NEXT: neg a6, a6
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: slli a7, a0, 5
+; RV64I-NEXT: srli t0, t0, 63
+; RV64I-NEXT: neg t0, t0
+; RV64I-NEXT: and a3, t0, a3
+; RV64I-NEXT: slli t0, a1, 57
+; RV64I-NEXT: srli a5, a5, 63
+; RV64I-NEXT: neg a5, a5
+; RV64I-NEXT: and a5, a5, a7
+; RV64I-NEXT: slli a7, a0, 6
+; RV64I-NEXT: srli t0, t0, 63
+; RV64I-NEXT: neg t0, t0
+; RV64I-NEXT: and a7, t0, a7
+; RV64I-NEXT: slli t0, a1, 63
+; RV64I-NEXT: srai t0, t0, 63
+; RV64I-NEXT: and t0, t0, a0
+; RV64I-NEXT: xor a2, t0, a2
+; RV64I-NEXT: xor a4, a4, a6
+; RV64I-NEXT: xor a3, a3, a5
+; RV64I-NEXT: slli a1, a1, 56
+; RV64I-NEXT: slli a0, a0, 7
+; RV64I-NEXT: srli a1, a1, 63
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: xor a2, a2, a4
+; RV64I-NEXT: xor a3, a3, a7
+; RV64I-NEXT: xor a2, a2, a3
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: xor a0, a2, a0
; RV64I-NEXT: ret
;
; CHECK-M-LABEL: clmul_i8:
@@ -272,184 +250,218 @@ define i16 @clmul_i16(i16 %a, i16 %b) nounwind {
; RV32I-LABEL: clmul_i16:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 1
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 2
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: lui a1, 4
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 1048568
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, a1, 30
+; RV32I-NEXT: slli a3, a0, 1
+; RV32I-NEXT: slli a4, a1, 29
+; RV32I-NEXT: slli a5, a0, 2
+; RV32I-NEXT: slli a6, a1, 28
+; RV32I-NEXT: slli a7, a0, 3
+; RV32I-NEXT: slli t0, a1, 27
+; RV32I-NEXT: slli t1, a0, 4
+; RV32I-NEXT: slli t2, a1, 26
+; RV32I-NEXT: slli t3, a0, 5
+; RV32I-NEXT: slli t4, a1, 25
+; RV32I-NEXT: slli t5, a0, 6
+; RV32I-NEXT: slli t6, a1, 24
+; RV32I-NEXT: slli s0, a0, 7
+; RV32I-NEXT: slli s1, a1, 23
+; RV32I-NEXT: srli a2, a2, 31
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: slli s2, a0, 8
+; RV32I-NEXT: srli a4, a4, 31
+; RV32I-NEXT: neg a3, a4
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: slli s3, a1, 22
+; RV32I-NEXT: srli a4, a6, 31
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: and a4, a4, a7
+; RV32I-NEXT: slli s4, a0, 9
+; RV32I-NEXT: srli a5, t0, 31
+; RV32I-NEXT: neg a5, a5
+; RV32I-NEXT: and a5, a5, t1
+; RV32I-NEXT: slli t0, a1, 21
+; RV32I-NEXT: srli a6, t2, 31
+; RV32I-NEXT: neg a6, a6
+; RV32I-NEXT: and a6, a6, t3
+; RV32I-NEXT: slli t1, a0, 10
+; RV32I-NEXT: srli a7, t4, 31
+; RV32I-NEXT: neg a7, a7
+; RV32I-NEXT: and a7, a7, t5
+; RV32I-NEXT: slli t2, a1, 20
+; RV32I-NEXT: srli t3, t6, 31
+; RV32I-NEXT: neg t3, t3
+; RV32I-NEXT: and t3, t3, s0
+; RV32I-NEXT: slli t4, a0, 11
+; RV32I-NEXT: srli s1, s1, 31
+; RV32I-NEXT: neg t5, s1
+; RV32I-NEXT: and t5, t5, s2
+; RV32I-NEXT: slli t6, a1, 19
+; RV32I-NEXT: srli s0, s3, 31
+; RV32I-NEXT: neg s0, s0
+; RV32I-NEXT: and s0, s0, s4
+; RV32I-NEXT: slli s1, a0, 12
+; RV32I-NEXT: srli t0, t0, 31
+; RV32I-NEXT: neg t0, t0
+; RV32I-NEXT: and t0, t0, t1
+; RV32I-NEXT: slli t1, a1, 18
+; RV32I-NEXT: srli t2, t2, 31
+; RV32I-NEXT: neg t2, t2
+; RV32I-NEXT: and t2, t2, t4
+; RV32I-NEXT: slli t4, a0, 13
+; RV32I-NEXT: srli t6, t6, 31
+; RV32I-NEXT: neg t6, t6
+; RV32I-NEXT: and t6, t6, s1
+; RV32I-NEXT: slli s1, a1, 17
+; RV32I-NEXT: srli t1, t1, 31
+; RV32I-NEXT: neg t1, t1
+; RV32I-NEXT: and t1, t1, t4
+; RV32I-NEXT: slli t4, a0, 14
+; RV32I-NEXT: srli s1, s1, 31
+; RV32I-NEXT: neg s1, s1
+; RV32I-NEXT: and t4, s1, t4
+; RV32I-NEXT: slli s1, a1, 31
+; RV32I-NEXT: slli a1, a1, 16
+; RV32I-NEXT: srai s1, s1, 31
+; RV32I-NEXT: and s1, s1, a0
+; RV32I-NEXT: slli a0, a0, 15
+; RV32I-NEXT: srli a1, a1, 31
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: xor a2, s1, a2
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: xor a1, a5, a6
+; RV32I-NEXT: xor a4, t3, t5
+; RV32I-NEXT: xor a5, t2, t6
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: xor a1, a1, a7
+; RV32I-NEXT: xor a4, a4, s0
+; RV32I-NEXT: xor a3, a5, t1
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: xor a2, a4, t0
+; RV32I-NEXT: xor a3, a3, t4
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a0, a3, a0
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: clmul_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 256
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 512
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 1024
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: li a1, 1
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: slli a1, a1, 11
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1048568
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: sd s0, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a1, 62
+; RV64I-NEXT: slli a3, a0, 1
+; RV64I-NEXT: slli a4, a1, 61
+; RV64I-NEXT: slli a5, a0, 2
+; RV64I-NEXT: slli a6, a1, 60
+; RV64I-NEXT: slli a7, a0, 3
+; RV64I-NEXT: slli t0, a1, 59
+; RV64I-NEXT: slli t1, a0, 4
+; RV64I-NEXT: slli t2, a1, 58
+; RV64I-NEXT: slli t3, a0, 5
+; RV64I-NEXT: slli t4, a1, 57
+; RV64I-NEXT: slli t5, a0, 6
+; RV64I-NEXT: slli t6, a1, 56
+; RV64I-NEXT: slli s0, a0, 7
+; RV64I-NEXT: slli s1, a1, 55
+; RV64I-NEXT: srli a2, a2, 63
+; RV64I-NEXT: neg a2, a2
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: slli s2, a0, 8
+; RV64I-NEXT: srli a4, a4, 63
+; RV64I-NEXT: neg a3, a4
+; RV64I-NEXT: and a3, a3, a5
+; RV64I-NEXT: slli s3, a1, 54
+; RV64I-NEXT: srli a4, a6, 63
+; RV64I-NEXT: neg a4, a4
+; RV64I-NEXT: and a4, a4, a7
+; RV64I-NEXT: slli s4, a0, 9
+; RV64I-NEXT: srli a5, t0, 63
+; RV64I-NEXT: neg a5, a5
+; RV64I-NEXT: and a5, a5, t1
+; RV64I-NEXT: slli t0, a1, 53
+; RV64I-NEXT: srli a6, t2, 63
+; RV64I-NEXT: neg a6, a6
+; RV64I-NEXT: and a6, a6, t3
+; RV64I-NEXT: slli t1, a0, 10
+; RV64I-NEXT: srli a7, t4, 63
+; RV64I-NEXT: neg a7, a7
+; RV64I-NEXT: and a7, a7, t5
+; RV64I-NEXT: slli t2, a1, 52
+; RV64I-NEXT: srli t3, t6, 63
+; RV64I-NEXT: neg t3, t3
+; RV64I-NEXT: and t3, t3, s0
+; RV64I-NEXT: slli t4, a0, 11
+; RV64I-NEXT: srli s1, s1, 63
+; RV64I-NEXT: neg t5, s1
+; RV64I-NEXT: and t5, t5, s2
+; RV64I-NEXT: slli t6, a1, 51
+; RV64I-NEXT: srli s0, s3, 63
+; RV64I-NEXT: neg s0, s0
+; RV64I-NEXT: and s0, s0, s4
+; RV64I-NEXT: slli s1, a0, 12
+; RV64I-NEXT: srli t0, t0, 63
+; RV64I-NEXT: neg t0, t0
+; RV64I-NEXT: and t0, t0, t1
+; RV64I-NEXT: slli t1, a1, 50
+; RV64I-NEXT: srli t2, t2, 63
+; RV64I-NEXT: neg t2, t2
+; RV64I-NEXT: and t2, t2, t4
+; RV64I-NEXT: slli t4, a0, 13
+; RV64I-NEXT: srli t6, t6, 63
+; RV64I-NEXT: neg t6, t6
+; RV64I-NEXT: and t6, t6, s1
+; RV64I-NEXT: slli s1, a1, 49
+; RV64I-NEXT: srli t1, t1, 63
+; RV64I-NEXT: neg t1, t1
+; RV64I-NEXT: and t1, t1, t4
+; RV64I-NEXT: slli t4, a0, 14
+; RV64I-NEXT: srli s1, s1, 63
+; RV64I-NEXT: neg s1, s1
+; RV64I-NEXT: and t4, s1, t4
+; RV64I-NEXT: slli s1, a1, 63
+; RV64I-NEXT: slli a1, a1, 48
+; RV64I-NEXT: srai s1, s1, 63
+; RV64I-NEXT: and s1, s1, a0
+; RV64I-NEXT: slli a0, a0, 15
+; RV64I-NEXT: srli a1, a1, 63
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: xor a2, s1, a2
+; RV64I-NEXT: xor a3, a3, a4
+; RV64I-NEXT: xor a1, a5, a6
+; RV64I-NEXT: xor a4, t3, t5
+; RV64I-NEXT: xor a5, t2, t6
+; RV64I-NEXT: xor a2, a2, a3
+; RV64I-NEXT: xor a1, a1, a7
+; RV64I-NEXT: xor a4, a4, s0
+; RV64I-NEXT: xor a3, a5, t1
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: xor a2, a4, t0
+; RV64I-NEXT: xor a3, a3, t4
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a0, a3, a0
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: ld s0, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
@@ -572,350 +584,498 @@ define i16 @clmul_i16(i16 %a, i16 %b) nounwind {
define i32 @clmul_i32(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: clmul_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 1
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 2
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: lui a1, 4
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 8
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 16
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 32
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 64
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: lui a1, 128
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 256
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 512
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 1024
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 2048
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 4096
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: lui a1, 8192
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 16384
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 32768
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 65536
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 131072
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 262144
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 524288
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: addi sp, sp, -80
+; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli t0, a0, 1
+; RV32I-NEXT: andi s1, a1, 2
+; RV32I-NEXT: slli t2, a0, 2
+; RV32I-NEXT: andi s4, a1, 4
+; RV32I-NEXT: slli t5, a0, 3
+; RV32I-NEXT: andi s9, a1, 8
+; RV32I-NEXT: slli s0, a0, 4
+; RV32I-NEXT: andi ra, a1, 16
+; RV32I-NEXT: slli s2, a0, 5
+; RV32I-NEXT: andi s11, a1, 32
+; RV32I-NEXT: slli a2, a0, 6
+; RV32I-NEXT: andi s10, a1, 64
+; RV32I-NEXT: slli s7, a0, 7
+; RV32I-NEXT: andi s8, a1, 128
+; RV32I-NEXT: slli s5, a0, 8
+; RV32I-NEXT: andi s6, a1, 256
+; RV32I-NEXT: slli a6, a0, 9
+; RV32I-NEXT: andi s3, a1, 512
+; RV32I-NEXT: slli a3, a0, 10
+; RV32I-NEXT: andi a5, a1, 1024
+; RV32I-NEXT: li a4, 1
+; RV32I-NEXT: lui a7, 1
+; RV32I-NEXT: lui t3, 4
+; RV32I-NEXT: lui t1, 8
+; RV32I-NEXT: lui t6, 16
+; RV32I-NEXT: seqz s1, s1
+; RV32I-NEXT: addi s1, s1, -1
+; RV32I-NEXT: and t0, s1, t0
+; RV32I-NEXT: sw t0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s1, 32
+; RV32I-NEXT: seqz s4, s4
+; RV32I-NEXT: addi s4, s4, -1
+; RV32I-NEXT: and t4, s4, t2
+; RV32I-NEXT: lui s4, 64
+; RV32I-NEXT: seqz s9, s9
+; RV32I-NEXT: addi s9, s9, -1
+; RV32I-NEXT: and t5, s9, t5
+; RV32I-NEXT: lui s9, 128
+; RV32I-NEXT: seqz ra, ra
+; RV32I-NEXT: addi ra, ra, -1
+; RV32I-NEXT: and s0, ra, s0
+; RV32I-NEXT: lui ra, 256
+; RV32I-NEXT: seqz s11, s11
+; RV32I-NEXT: addi s11, s11, -1
+; RV32I-NEXT: and s2, s11, s2
+; RV32I-NEXT: lui s11, 512
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: and a2, s10, a2
+; RV32I-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s10, 1024
+; RV32I-NEXT: seqz s8, s8
+; RV32I-NEXT: addi s8, s8, -1
+; RV32I-NEXT: and s7, s8, s7
+; RV32I-NEXT: lui s8, 2048
+; RV32I-NEXT: seqz s6, s6
+; RV32I-NEXT: addi s6, s6, -1
+; RV32I-NEXT: and s5, s6, s5
+; RV32I-NEXT: lui s6, 4096
+; RV32I-NEXT: seqz s3, s3
+; RV32I-NEXT: addi s3, s3, -1
+; RV32I-NEXT: and a2, s3, a6
+; RV32I-NEXT: sw a2, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s3, a0, 31
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: and a3, a5, a3
+; RV32I-NEXT: sw a3, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a3, a1, 31
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and a2, a3, s3
+; RV32I-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s3, 8192
+; RV32I-NEXT: slli a4, a4, 11
+; RV32I-NEXT: and a7, a1, a7
+; RV32I-NEXT: lui a2, 2
+; RV32I-NEXT: and a2, a1, a2
+; RV32I-NEXT: and a3, a1, t3
+; RV32I-NEXT: and a5, a1, t1
+; RV32I-NEXT: and a6, a1, t6
+; RV32I-NEXT: and s1, a1, s1
+; RV32I-NEXT: and t0, a1, s4
+; RV32I-NEXT: and t1, a1, s9
+; RV32I-NEXT: and ra, a1, ra
+; RV32I-NEXT: and s11, a1, s11
+; RV32I-NEXT: and s10, a1, s10
+; RV32I-NEXT: and s9, a1, s8
+; RV32I-NEXT: and s8, a1, s6
+; RV32I-NEXT: and s4, a1, s3
+; RV32I-NEXT: lui t2, 16384
+; RV32I-NEXT: and s3, a1, t2
+; RV32I-NEXT: lui t2, 32768
+; RV32I-NEXT: and t2, a1, t2
+; RV32I-NEXT: lui t3, 65536
+; RV32I-NEXT: and t6, a1, t3
+; RV32I-NEXT: lui t3, 131072
+; RV32I-NEXT: and t3, a1, t3
+; RV32I-NEXT: lui s6, 262144
+; RV32I-NEXT: and s6, a1, s6
+; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a1, a4
+; RV32I-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a0
+; RV32I-NEXT: lw a4, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a1, a4
+; RV32I-NEXT: xor a1, t4, t5
+; RV32I-NEXT: xor t4, s0, s2
+; RV32I-NEXT: xor t5, s7, s5
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: slli s0, a0, 12
+; RV32I-NEXT: and a7, a7, s0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli s0, a0, 13
+; RV32I-NEXT: and a2, a2, s0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli s0, a0, 14
+; RV32I-NEXT: and s0, a3, s0
+; RV32I-NEXT: seqz a3, a5
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a0, 15
+; RV32I-NEXT: and s7, a3, a5
+; RV32I-NEXT: seqz a3, a6
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a0, 16
+; RV32I-NEXT: and s6, a3, a5
+; RV32I-NEXT: seqz a3, s1
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a0, 17
+; RV32I-NEXT: and s2, a3, a5
+; RV32I-NEXT: seqz a3, t0
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a0, 18
+; RV32I-NEXT: and s5, a3, a5
+; RV32I-NEXT: seqz a3, t1
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a0, 19
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: seqz a5, ra
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a0, 20
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: seqz a6, s11
+; RV32I-NEXT: addi a6, a6, -1
+; RV32I-NEXT: slli t0, a0, 21
+; RV32I-NEXT: and a6, a6, t0
+; RV32I-NEXT: seqz t0, s10
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: slli t1, a0, 22
+; RV32I-NEXT: and t0, t0, t1
+; RV32I-NEXT: seqz t1, s9
+; RV32I-NEXT: addi t1, t1, -1
+; RV32I-NEXT: slli s1, a0, 23
+; RV32I-NEXT: and t1, t1, s1
+; RV32I-NEXT: seqz s1, s8
+; RV32I-NEXT: addi s1, s1, -1
+; RV32I-NEXT: slli s8, a0, 24
+; RV32I-NEXT: and s1, s1, s8
+; RV32I-NEXT: seqz s4, s4
+; RV32I-NEXT: addi s4, s4, -1
+; RV32I-NEXT: slli s8, a0, 25
+; RV32I-NEXT: and s4, s4, s8
+; RV32I-NEXT: seqz s3, s3
+; RV32I-NEXT: addi s3, s3, -1
+; RV32I-NEXT: slli s8, a0, 26
+; RV32I-NEXT: and s3, s3, s8
+; RV32I-NEXT: seqz t2, t2
+; RV32I-NEXT: addi t2, t2, -1
+; RV32I-NEXT: slli s8, a0, 27
+; RV32I-NEXT: and t2, t2, s8
+; RV32I-NEXT: seqz t6, t6
+; RV32I-NEXT: addi t6, t6, -1
+; RV32I-NEXT: slli s8, a0, 28
+; RV32I-NEXT: and t6, t6, s8
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: slli s8, a0, 29
+; RV32I-NEXT: and t3, t3, s8
+; RV32I-NEXT: lw s8, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: seqz s8, s8
+; RV32I-NEXT: addi s8, s8, -1
+; RV32I-NEXT: slli s9, a0, 30
+; RV32I-NEXT: and s8, s8, s9
+; RV32I-NEXT: xor a1, a4, a1
+; RV32I-NEXT: lw a4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, t4, a4
+; RV32I-NEXT: lw t4, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t5, t4
+; RV32I-NEXT: slli a0, a0, 11
+; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: seqz t5, t5
+; RV32I-NEXT: addi t5, t5, -1
+; RV32I-NEXT: and a0, t5, a0
+; RV32I-NEXT: xor t5, s0, s7
+; RV32I-NEXT: xor a3, a3, a5
+; RV32I-NEXT: xor a5, s4, s3
+; RV32I-NEXT: xor a1, a1, a4
+; RV32I-NEXT: lw a4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, t4, a4
+; RV32I-NEXT: xor a0, a0, a7
+; RV32I-NEXT: xor a7, t5, s6
+; RV32I-NEXT: xor a3, a3, a6
+; RV32I-NEXT: xor a5, a5, t2
+; RV32I-NEXT: xor a1, a1, a4
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: xor a2, a7, s2
+; RV32I-NEXT: xor a3, a3, t0
+; RV32I-NEXT: xor a4, a5, t6
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: xor a1, a2, s5
+; RV32I-NEXT: xor a2, a3, t1
+; RV32I-NEXT: xor a3, a4, t3
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a2, a2, s1
+; RV32I-NEXT: xor a1, a3, s8
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: lw a2, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 80
; RV32I-NEXT: ret
;
; RV64I-LABEL: clmul_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 256
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 512
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 1024
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: li a1, 1
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: slli a1, a1, 11
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 32
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 64
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 256
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 512
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 1024
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2048
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 4096
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 16384
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 32768
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 65536
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 131072
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 262144
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 524288
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: addi sp, sp, -176
+; RV64I-NEXT: sd ra, 168(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 160(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a1, 62
+; RV64I-NEXT: slliw s9, a0, 1
+; RV64I-NEXT: slli a3, a1, 61
+; RV64I-NEXT: slliw s10, a0, 2
+; RV64I-NEXT: slli a4, a1, 60
+; RV64I-NEXT: slliw a5, a0, 3
+; RV64I-NEXT: slli t3, a1, 59
+; RV64I-NEXT: slliw s11, a0, 4
+; RV64I-NEXT: slli a6, a1, 58
+; RV64I-NEXT: slliw s1, a0, 5
+; RV64I-NEXT: slli a7, a1, 57
+; RV64I-NEXT: slliw t0, a0, 6
+; RV64I-NEXT: slli t1, a1, 56
+; RV64I-NEXT: slliw s2, a0, 7
+; RV64I-NEXT: slli t2, a1, 55
+; RV64I-NEXT: slliw s3, a0, 8
+; RV64I-NEXT: slli t4, a1, 54
+; RV64I-NEXT: slliw s4, a0, 9
+; RV64I-NEXT: slli t5, a1, 53
+; RV64I-NEXT: slliw s5, a0, 10
+; RV64I-NEXT: slli s6, a1, 52
+; RV64I-NEXT: slliw ra, a0, 11
+; RV64I-NEXT: slli t6, a1, 51
+; RV64I-NEXT: slliw s7, a0, 12
+; RV64I-NEXT: slli s0, a1, 50
+; RV64I-NEXT: slliw s8, a0, 13
+; RV64I-NEXT: srli a2, a2, 63
+; RV64I-NEXT: neg a2, a2
+; RV64I-NEXT: and a2, a2, s9
+; RV64I-NEXT: sd a2, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s9, a1, 49
+; RV64I-NEXT: srli a3, a3, 63
+; RV64I-NEXT: neg a2, a3
+; RV64I-NEXT: and a2, a2, s10
+; RV64I-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slliw s10, a0, 14
+; RV64I-NEXT: srli a4, a4, 63
+; RV64I-NEXT: neg a4, a4
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a3, a1, 48
+; RV64I-NEXT: srli a5, t3, 63
+; RV64I-NEXT: neg a5, a5
+; RV64I-NEXT: and a2, a5, s11
+; RV64I-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slliw s11, a0, 15
+; RV64I-NEXT: srli a6, a6, 63
+; RV64I-NEXT: neg a6, a6
+; RV64I-NEXT: and a2, a6, s1
+; RV64I-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s1, a1, 47
+; RV64I-NEXT: srli a7, a7, 63
+; RV64I-NEXT: neg a7, a7
+; RV64I-NEXT: and a2, a7, t0
+; RV64I-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slliw a2, a0, 16
+; RV64I-NEXT: srli t0, t1, 63
+; RV64I-NEXT: neg t0, t0
+; RV64I-NEXT: and a4, t0, s2
+; RV64I-NEXT: sd a4, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s2, a1, 46
+; RV64I-NEXT: srli t0, t2, 63
+; RV64I-NEXT: neg t0, t0
+; RV64I-NEXT: and t3, t0, s3
+; RV64I-NEXT: slliw s3, a0, 17
+; RV64I-NEXT: srli t0, t4, 63
+; RV64I-NEXT: neg t0, t0
+; RV64I-NEXT: and t2, t0, s4
+; RV64I-NEXT: slli s4, a1, 45
+; RV64I-NEXT: srli t0, t5, 63
+; RV64I-NEXT: neg t0, t0
+; RV64I-NEXT: and a4, t0, s5
+; RV64I-NEXT: sd a4, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slliw s5, a0, 18
+; RV64I-NEXT: srli t4, s6, 63
+; RV64I-NEXT: neg t4, t4
+; RV64I-NEXT: and t5, t4, ra
+; RV64I-NEXT: slli t4, a1, 44
+; RV64I-NEXT: srli t6, t6, 63
+; RV64I-NEXT: neg t6, t6
+; RV64I-NEXT: and ra, t6, s7
+; RV64I-NEXT: slliw t6, a0, 19
+; RV64I-NEXT: srli s0, s0, 63
+; RV64I-NEXT: neg s0, s0
+; RV64I-NEXT: and s0, s0, s8
+; RV64I-NEXT: slli s8, a1, 43
+; RV64I-NEXT: srli s6, s9, 63
+; RV64I-NEXT: neg s6, s6
+; RV64I-NEXT: and s10, s6, s10
+; RV64I-NEXT: slliw s9, a0, 20
+; RV64I-NEXT: srli a3, a3, 63
+; RV64I-NEXT: neg a3, a3
+; RV64I-NEXT: and s11, a3, s11
+; RV64I-NEXT: slli s6, a1, 42
+; RV64I-NEXT: srli s1, s1, 63
+; RV64I-NEXT: neg a3, s1
+; RV64I-NEXT: and t0, a3, a2
+; RV64I-NEXT: slliw a2, a0, 21
+; RV64I-NEXT: srli s1, s2, 63
+; RV64I-NEXT: neg s1, s1
+; RV64I-NEXT: and s3, s1, s3
+; RV64I-NEXT: slli s1, a1, 41
+; RV64I-NEXT: srli s2, s4, 63
+; RV64I-NEXT: neg s2, s2
+; RV64I-NEXT: and s7, s2, s5
+; RV64I-NEXT: slliw s2, a0, 22
+; RV64I-NEXT: srli t4, t4, 63
+; RV64I-NEXT: neg t4, t4
+; RV64I-NEXT: and t4, t4, t6
+; RV64I-NEXT: slli t6, a1, 40
+; RV64I-NEXT: srli s4, s8, 63
+; RV64I-NEXT: neg s4, s4
+; RV64I-NEXT: and s4, s4, s9
+; RV64I-NEXT: slliw s9, a0, 23
+; RV64I-NEXT: srli s5, s6, 63
+; RV64I-NEXT: neg s5, s5
+; RV64I-NEXT: and s5, s5, a2
+; RV64I-NEXT: slli a2, a1, 39
+; RV64I-NEXT: srli s1, s1, 63
+; RV64I-NEXT: neg s1, s1
+; RV64I-NEXT: and s8, s1, s2
+; RV64I-NEXT: slliw s1, a0, 24
+; RV64I-NEXT: srli t6, t6, 63
+; RV64I-NEXT: neg t6, t6
+; RV64I-NEXT: and t6, t6, s9
+; RV64I-NEXT: slli s2, a1, 38
+; RV64I-NEXT: srli a2, a2, 63
+; RV64I-NEXT: neg a2, a2
+; RV64I-NEXT: and s6, a2, s1
+; RV64I-NEXT: slliw a2, a0, 25
+; RV64I-NEXT: srli s1, s2, 63
+; RV64I-NEXT: neg s1, s1
+; RV64I-NEXT: and a7, s1, a2
+; RV64I-NEXT: slli s1, a1, 37
+; RV64I-NEXT: srli s1, s1, 63
+; RV64I-NEXT: neg s1, s1
+; RV64I-NEXT: slliw s2, a0, 26
+; RV64I-NEXT: and s9, s1, s2
+; RV64I-NEXT: slli s1, a1, 36
+; RV64I-NEXT: srli s1, s1, 63
+; RV64I-NEXT: neg s1, s1
+; RV64I-NEXT: slliw s2, a0, 27
+; RV64I-NEXT: and s1, s1, s2
+; RV64I-NEXT: slli s2, a1, 35
+; RV64I-NEXT: srli s2, s2, 63
+; RV64I-NEXT: neg s2, s2
+; RV64I-NEXT: slliw a4, a0, 28
+; RV64I-NEXT: and a4, s2, a4
+; RV64I-NEXT: slli s2, a1, 34
+; RV64I-NEXT: srli s2, s2, 63
+; RV64I-NEXT: neg s2, s2
+; RV64I-NEXT: slliw a5, a0, 29
+; RV64I-NEXT: and a5, s2, a5
+; RV64I-NEXT: slli s2, a1, 33
+; RV64I-NEXT: srli s2, s2, 63
+; RV64I-NEXT: neg s2, s2
+; RV64I-NEXT: slliw a6, a0, 30
+; RV64I-NEXT: and a6, s2, a6
+; RV64I-NEXT: lui s2, 524288
+; RV64I-NEXT: and s2, a1, s2
+; RV64I-NEXT: slli a1, a1, 63
+; RV64I-NEXT: srai a1, a1, 63
+; RV64I-NEXT: and a1, a1, a0
+; RV64I-NEXT: ld a3, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a1, a3
+; RV64I-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: ld a2, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld t1, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, t1
+; RV64I-NEXT: ld t1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, t3
+; RV64I-NEXT: xor t3, t5, ra
+; RV64I-NEXT: xor t0, t0, s3
+; RV64I-NEXT: xor t5, s8, t6
+; RV64I-NEXT: xor a5, a5, a6
+; RV64I-NEXT: xor a1, a3, a1
+; RV64I-NEXT: ld a3, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, a3
+; RV64I-NEXT: xor a3, t1, t2
+; RV64I-NEXT: xor a6, t3, s0
+; RV64I-NEXT: xor t0, t0, s7
+; RV64I-NEXT: xor t1, t5, s6
+; RV64I-NEXT: slliw a0, a0, 31
+; RV64I-NEXT: sext.w s2, s2
+; RV64I-NEXT: seqz t2, s2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and a0, t2, a0
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: ld a2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: xor a3, a6, s10
+; RV64I-NEXT: xor a6, t0, t4
+; RV64I-NEXT: xor a7, t1, a7
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a2, a3, s11
+; RV64I-NEXT: xor a3, a6, s4
+; RV64I-NEXT: xor a6, a7, s9
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a2, a3, s5
+; RV64I-NEXT: xor a3, a6, s1
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a3, a3, a4
+; RV64I-NEXT: xor a1, a1, a3
+; RV64I-NEXT: xor a0, a5, a0
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: ld ra, 168(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 176
; RV64I-NEXT: ret
;
; RV32IM-LABEL: clmul_i32:
@@ -1518,1109 +1678,1635 @@ define i32 @clmul_i32(i32 %a, i32 %b) nounwind {
define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: clmul_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -176
-; RV32I-NEXT: sw ra, 172(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 168(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 164(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 160(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 156(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 152(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 148(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 144(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 140(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 136(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 132(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s10, 128(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s11, 124(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: mv s3, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: lui a7, 16
-; RV32I-NEXT: srli a1, s0, 24
-; RV32I-NEXT: slli a2, s0, 24
-; RV32I-NEXT: lui a3, 61681
-; RV32I-NEXT: lui a4, 209715
-; RV32I-NEXT: lui t0, 349525
-; RV32I-NEXT: srli a5, s3, 8
-; RV32I-NEXT: srli a6, s3, 24
-; RV32I-NEXT: addi s10, a7, -256
-; RV32I-NEXT: and a0, a0, s10
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: slli a1, s3, 24
-; RV32I-NEXT: addi s8, a3, -241
-; RV32I-NEXT: addi s6, a4, 819
-; RV32I-NEXT: and a3, a5, s10
-; RV32I-NEXT: or a3, a3, a6
-; RV32I-NEXT: and a4, s0, s10
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a2, a2, a4
-; RV32I-NEXT: and a4, s3, s10
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a1, a1, a4
-; RV32I-NEXT: addi s9, t0, 1365
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: and a0, a0, s8
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, s8
-; RV32I-NEXT: and a2, a2, s8
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: and a3, a3, s8
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: and a2, a2, s6
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s6
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 1
-; RV32I-NEXT: and a0, a0, s9
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, s9
-; RV32I-NEXT: and a2, a2, s9
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, s9
+; RV32I-NEXT: addi sp, sp, -368
+; RV32I-NEXT: sw ra, 364(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 360(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 356(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 352(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 348(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 344(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 340(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 336(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 332(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s8, 328(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s9, 324(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s10, 320(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s11, 316(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv ra, a3
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: lui s10, 16
+; RV32I-NEXT: srli a5, a0, 24
+; RV32I-NEXT: srli a6, a2, 8
+; RV32I-NEXT: srli a7, a2, 24
+; RV32I-NEXT: mv t1, a1
+; RV32I-NEXT: sw a1, 300(sp) # 4-byte Folded Spill
; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: or s4, a2, a0
-; RV32I-NEXT: or s7, a3, a1
-; RV32I-NEXT: andi a1, s7, 2
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: andi a1, s7, 1
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, a0, s5
-; RV32I-NEXT: andi a1, s7, 4
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: andi a1, s7, 8
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: andi a1, s7, 16
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: andi a1, s7, 32
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s7, 64
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: andi a1, s7, 128
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: andi a1, s7, 256
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s7, 512
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s7, 1024
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a1, s7, a1
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: andi s6, a2, 2
+; RV32I-NEXT: slli t0, t1, 2
+; RV32I-NEXT: andi s4, a2, 4
+; RV32I-NEXT: slli s0, t1, 3
+; RV32I-NEXT: andi s3, a2, 8
+; RV32I-NEXT: slli t5, t1, 4
+; RV32I-NEXT: andi s1, a2, 16
+; RV32I-NEXT: slli t2, t1, 5
+; RV32I-NEXT: andi t6, a2, 32
+; RV32I-NEXT: slli t1, t1, 6
+; RV32I-NEXT: andi t4, a2, 64
+; RV32I-NEXT: andi t3, a2, 128
+; RV32I-NEXT: slli s8, a0, 1
+; RV32I-NEXT: andi s5, a3, 2
+; RV32I-NEXT: slli s9, a0, 2
+; RV32I-NEXT: andi s11, a3, 4
+; RV32I-NEXT: slli s7, a0, 3
+; RV32I-NEXT: addi a3, s10, -256
+; RV32I-NEXT: sw a3, 312(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a3
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: sw a4, 308(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi s10, ra, 8
+; RV32I-NEXT: and a4, a6, a3
+; RV32I-NEXT: or a3, a4, a7
+; RV32I-NEXT: sw a3, 304(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 4
+; RV32I-NEXT: seqz a7, s6
+; RV32I-NEXT: seqz a4, s5
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: addi a5, a4, -1
+; RV32I-NEXT: and a4, a7, a1
+; RV32I-NEXT: and a1, a5, s8
+; RV32I-NEXT: sw a1, 252(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a7, s8
+; RV32I-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi s6, ra, 16
+; RV32I-NEXT: seqz a1, s4
+; RV32I-NEXT: seqz s4, s11
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi s5, s4, -1
+; RV32I-NEXT: and s4, a1, t0
+; RV32I-NEXT: and s2, s5, s9
+; RV32I-NEXT: and a7, a1, s9
+; RV32I-NEXT: slli s8, a0, 5
+; RV32I-NEXT: seqz s3, s3
+; RV32I-NEXT: seqz s5, s10
+; RV32I-NEXT: addi s9, s3, -1
+; RV32I-NEXT: addi s3, s5, -1
+; RV32I-NEXT: and s5, s9, s0
+; RV32I-NEXT: and s3, s3, s7
+; RV32I-NEXT: and s0, s9, s7
+; RV32I-NEXT: andi s7, ra, 32
+; RV32I-NEXT: seqz s1, s1
+; RV32I-NEXT: seqz s6, s6
+; RV32I-NEXT: addi s9, s1, -1
+; RV32I-NEXT: addi s6, s6, -1
+; RV32I-NEXT: and s1, s9, t5
+; RV32I-NEXT: and t5, s6, a6
+; RV32I-NEXT: and a5, s9, a6
+; RV32I-NEXT: slli s9, a0, 6
+; RV32I-NEXT: seqz t6, t6
+; RV32I-NEXT: seqz s6, s7
+; RV32I-NEXT: addi s7, t6, -1
+; RV32I-NEXT: addi t6, s6, -1
+; RV32I-NEXT: and s6, s7, t2
+; RV32I-NEXT: and t6, t6, s8
+; RV32I-NEXT: and t2, s7, s8
+; RV32I-NEXT: andi s7, ra, 64
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: seqz s7, s7
+; RV32I-NEXT: addi t4, t4, -1
+; RV32I-NEXT: addi s7, s7, -1
+; RV32I-NEXT: and a1, t4, t1
+; RV32I-NEXT: sw a1, 280(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, s7, s9
+; RV32I-NEXT: sw a1, 284(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, t4, s9
+; RV32I-NEXT: sw a1, 288(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi t1, ra, 128
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: seqz t1, t1
+; RV32I-NEXT: addi s7, t3, -1
+; RV32I-NEXT: addi t1, t1, -1
+; RV32I-NEXT: lw a3, 300(sp) # 4-byte Folded Reload
+; RV32I-NEXT: slli t3, a3, 7
+; RV32I-NEXT: and t4, s7, t3
+; RV32I-NEXT: slli s8, a0, 7
+; RV32I-NEXT: and a1, t1, s8
+; RV32I-NEXT: and t1, s7, s8
+; RV32I-NEXT: andi s7, a2, 256
+; RV32I-NEXT: seqz s7, s7
+; RV32I-NEXT: mv t0, ra
+; RV32I-NEXT: andi s8, ra, 256
+; RV32I-NEXT: seqz s8, s8
+; RV32I-NEXT: addi s7, s7, -1
+; RV32I-NEXT: addi s8, s8, -1
+; RV32I-NEXT: slli s9, a3, 8
+; RV32I-NEXT: and s9, s7, s9
+; RV32I-NEXT: slli s10, a0, 8
+; RV32I-NEXT: and s8, s8, s10
+; RV32I-NEXT: and s7, s7, s10
+; RV32I-NEXT: andi s10, a2, 512
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: andi s11, ra, 512
+; RV32I-NEXT: seqz s11, s11
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: addi s11, s11, -1
+; RV32I-NEXT: slli ra, a3, 9
+; RV32I-NEXT: and a6, s10, ra
+; RV32I-NEXT: sw a6, 248(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli ra, a0, 9
+; RV32I-NEXT: and a6, s11, ra
+; RV32I-NEXT: sw a6, 256(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a6, s10, ra
+; RV32I-NEXT: sw a6, 260(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi s10, a2, 1024
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: andi s11, t0, 1024
+; RV32I-NEXT: seqz s11, s11
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: addi s11, s11, -1
+; RV32I-NEXT: slli ra, a3, 10
+; RV32I-NEXT: and a6, s10, ra
+; RV32I-NEXT: sw a6, 264(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli ra, a0, 10
+; RV32I-NEXT: and a6, s11, ra
+; RV32I-NEXT: sw a6, 268(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a6, s10, ra
+; RV32I-NEXT: sw a6, 272(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli s10, a2, 31
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: srli s11, t0, 31
+; RV32I-NEXT: seqz s11, s11
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: addi s11, s11, -1
+; RV32I-NEXT: slli ra, a3, 31
+; RV32I-NEXT: and a6, s10, ra
+; RV32I-NEXT: sw a6, 296(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli ra, a0, 31
+; RV32I-NEXT: and a6, s11, ra
+; RV32I-NEXT: sw a6, 292(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a6, s10, ra
+; RV32I-NEXT: sw a6, 276(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a6, 312(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and s10, a2, a6
+; RV32I-NEXT: slli s10, s10, 8
+; RV32I-NEXT: slli s11, a2, 24
+; RV32I-NEXT: or t3, s11, s10
+; RV32I-NEXT: sw t3, 224(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi s10, a2, 1
+; RV32I-NEXT: mv t3, a2
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: and s11, s10, a3
+; RV32I-NEXT: xor a2, s11, a4
+; RV32I-NEXT: sw a2, 220(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a2, s4, s5
+; RV32I-NEXT: sw a2, 216(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a2, s1, s6
+; RV32I-NEXT: sw a2, 212(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a2, t4, s9
+; RV32I-NEXT: sw a2, 208(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, t0, 1
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a4, a4, a0
+; RV32I-NEXT: lw a2, 252(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a4, a2
+; RV32I-NEXT: sw a2, 204(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a2, s2, s3
+; RV32I-NEXT: sw a2, 188(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a2, t5, t6
+; RV32I-NEXT: sw a2, 184(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a1, a1, s8
+; RV32I-NEXT: sw a1, 180(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, s10, a0
+; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: lw a2, 244(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a4, a2
+; RV32I-NEXT: xor a0, a7, s0
+; RV32I-NEXT: sw a0, 200(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, a5, t2
+; RV32I-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, t1, s7
+; RV32I-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 12
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 228(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 12
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 232(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 13
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 236(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 13
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 240(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 252(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 14
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 128(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 14
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 132(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 164(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: xor s11, s11, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 15
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 120(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 15
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 124(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 16
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 136(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 16
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 144(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 156(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 17
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 148(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 17
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 152(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 18
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 168(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 18
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 172(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 19
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 19
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 20
+; RV32I-NEXT: and s11, a1, a5
+; RV32I-NEXT: slli a5, s2, 20
+; RV32I-NEXT: and ra, a4, a5
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: lui a1, 1024
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: and a1, s7, a1
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 21
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 21
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a0, 1024
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 22
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 76(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 22
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 80(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, a3, 23
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: sw a5, 108(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s2, 23
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 112(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, s2, a6
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a6, s2, 24
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a7, a3, 24
+; RV32I-NEXT: and a0, a1, a7
+; RV32I-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a4, a6
+; RV32I-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a1, a6
+; RV32I-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a6, a3, 25
+; RV32I-NEXT: and t1, a1, a6
+; RV32I-NEXT: slli a6, s2, 25
+; RV32I-NEXT: and s8, a4, a6
+; RV32I-NEXT: and a0, a1, a6
+; RV32I-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and a4, t0, a0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a6, a4, -1
+; RV32I-NEXT: slli a4, a3, 26
+; RV32I-NEXT: and a4, a1, a4
+; RV32I-NEXT: slli t2, s2, 26
+; RV32I-NEXT: and s6, a6, t2
+; RV32I-NEXT: and s10, a1, t2
; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and t2, t0, a0
+; RV32I-NEXT: seqz t2, t2
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi t4, t2, -1
+; RV32I-NEXT: slli t2, a3, 27
+; RV32I-NEXT: and s9, a1, t2
+; RV32I-NEXT: slli s0, s2, 27
+; RV32I-NEXT: and a0, t4, s0
+; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: lui a1, 131072
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: and a1, s7, a1
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and t5, t0, a0
+; RV32I-NEXT: seqz t5, t5
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi t6, t5, -1
+; RV32I-NEXT: slli t5, a3, 28
+; RV32I-NEXT: and a0, a1, t5
+; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s1, s2, 28
+; RV32I-NEXT: and a0, t6, s1
+; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, s1
+; RV32I-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a0, 131072
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and s3, t0, a0
+; RV32I-NEXT: seqz s3, s3
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi s3, s3, -1
+; RV32I-NEXT: slli s4, a3, 29
+; RV32I-NEXT: and a0, a1, s4
+; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s5, s2, 29
+; RV32I-NEXT: and a0, s3, s5
+; RV32I-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a1, s5
+; RV32I-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: lui a1, 349525
-; RV32I-NEXT: addi a1, a1, 1364
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: srli a2, a0, 8
-; RV32I-NEXT: srli a3, a0, 24
-; RV32I-NEXT: and a2, a2, s10
-; RV32I-NEXT: and a4, a0, s10
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: or a0, a0, a4
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: and a0, a0, s8
-; RV32I-NEXT: and a2, a2, s8
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: and a2, a2, s6
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: srli a2, a0, 1
-; RV32I-NEXT: and a0, a0, s9
-; RV32I-NEXT: and a1, a2, a1
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
-; RV32I-NEXT: andi a1, s3, 2
-; RV32I-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 1
-; RV32I-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, a0, s4
-; RV32I-NEXT: andi a1, s3, 4
-; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 8
-; RV32I-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s3, 16
-; RV32I-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 32
-; RV32I-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s3, 64
-; RV32I-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s3, 128
-; RV32I-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 256
-; RV32I-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s3, 512
-; RV32I-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s3, 1024
-; RV32I-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lw s6, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, s3, s6
-; RV32I-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: and a1, t3, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: and s3, t0, a0
+; RV32I-NEXT: seqz s3, s3
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi s5, s3, -1
+; RV32I-NEXT: slli s3, a3, 30
+; RV32I-NEXT: and a0, a1, s3
+; RV32I-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s7, s2, 30
+; RV32I-NEXT: and a0, s5, s7
+; RV32I-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and s7, a1, s7
+; RV32I-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or s3, a5, a0
+; RV32I-NEXT: lw a0, 304(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or s5, a1, a0
+; RV32I-NEXT: lw a0, 220(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a0, a1
+; RV32I-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a1, a0
+; RV32I-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a1, a0
+; RV32I-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, a0, a1
+; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, a0, s11
+; RV32I-NEXT: xor t5, t1, a4
+; RV32I-NEXT: lw a0, 204(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 188(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, a0, s0
+; RV32I-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, a1, a0
+; RV32I-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 180(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a1, a0
+; RV32I-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 124(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, a0, s1
+; RV32I-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a0, ra
+; RV32I-NEXT: xor s8, s8, s6
+; RV32I-NEXT: lw a0, 200(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, a2, a0
+; RV32I-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a4, 196(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a0
+; RV32I-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 192(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a0
+; RV32I-NEXT: slli a2, a3, 11
+; RV32I-NEXT: slli s2, s2, 11
+; RV32I-NEXT: li s6, 1
+; RV32I-NEXT: slli s6, s6, 11
+; RV32I-NEXT: and a0, t3, s6
+; RV32I-NEXT: and a1, t0, s6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: and a1, a1, s2
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: lw a3, 164(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw t0, 140(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, a3, t0
+; RV32I-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw t0, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, t0
+; RV32I-NEXT: lw t0, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, t0, s10
+; RV32I-NEXT: xor a6, t2, a6
+; RV32I-NEXT: lw t0, 264(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 228(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, t0
+; RV32I-NEXT: lw t0, 136(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, s4, t0
+; RV32I-NEXT: lw t0, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, s11, t0
+; RV32I-NEXT: xor t5, t5, s9
+; RV32I-NEXT: xor t0, s0, t4
+; RV32I-NEXT: lw t4, 268(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t1, t4
+; RV32I-NEXT: lw t4, 232(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, t4
+; RV32I-NEXT: lw t4, 144(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, s1, t4
+; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, ra, s0
+; RV32I-NEXT: lw s1, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s8, s1
+; RV32I-NEXT: xor a4, t6, a4
+; RV32I-NEXT: lw t6, 272(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t6
+; RV32I-NEXT: lw t6, 244(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, t6
+; RV32I-NEXT: lw t6, 156(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, s2, t6
+; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, s1
+; RV32I-NEXT: lw s1, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s10, s1
+; RV32I-NEXT: xor a6, a6, a7
+; RV32I-NEXT: lw a7, 236(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a7
+; RV32I-NEXT: lw a7, 148(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, t2, a7
+; RV32I-NEXT: lw t2, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, t3, t2
+; RV32I-NEXT: lw t3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t5, t3
+; RV32I-NEXT: xor t0, t0, t1
+; RV32I-NEXT: lw t1, 240(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, t1
+; RV32I-NEXT: lw t1, 152(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t4, t1
+; RV32I-NEXT: lw t4, 80(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, s0, t4
+; RV32I-NEXT: lw t5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, s8, t5
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a5, 252(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a0, a5
+; RV32I-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, t6, a0
+; RV32I-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s2, a0
+; RV32I-NEXT: lui a0, 61681
+; RV32I-NEXT: addi s4, a0, -241
+; RV32I-NEXT: srli s2, s3, 4
+; RV32I-NEXT: and s1, s3, s4
+; RV32I-NEXT: and s2, s2, s4
+; RV32I-NEXT: slli s1, s1, 4
+; RV32I-NEXT: or s1, s2, s1
+; RV32I-NEXT: srli s2, s5, 4
+; RV32I-NEXT: and s3, s5, s4
+; RV32I-NEXT: and s2, s2, s4
+; RV32I-NEXT: slli s3, s3, 4
+; RV32I-NEXT: or s2, s2, s3
+; RV32I-NEXT: xor a2, a6, a2
+; RV32I-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a7, a0
+; RV32I-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, t2, a0
+; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, t3, a0
+; RV32I-NEXT: xor a1, t0, a1
+; RV32I-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, a0
+; RV32I-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t4, a0
+; RV32I-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t5, a0
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, t6, a0
+; RV32I-NEXT: lw a0, 116(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, s0, a0
+; RV32I-NEXT: xor a2, a2, a6
+; RV32I-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a7, a0
+; RV32I-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, t2, a0
+; RV32I-NEXT: xor a1, a1, t0
+; RV32I-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, a0
+; RV32I-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t3, a0
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a0, 104(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: xor a5, t4, s7
+; RV32I-NEXT: xor a2, a2, a6
+; RV32I-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a7, a0
+; RV32I-NEXT: xor a7, a1, t0
+; RV32I-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, a0
+; RV32I-NEXT: xor a3, a4, a3
+; RV32I-NEXT: sw a3, 308(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a5, a0
+; RV32I-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi t6, a1, 819
+; RV32I-NEXT: srli a3, s1, 2
+; RV32I-NEXT: and s1, s1, t6
+; RV32I-NEXT: and a3, a3, t6
+; RV32I-NEXT: slli s1, s1, 2
+; RV32I-NEXT: or a3, a3, s1
+; RV32I-NEXT: srli a4, s2, 2
+; RV32I-NEXT: and a5, s2, t6
+; RV32I-NEXT: and a4, a4, t6
+; RV32I-NEXT: slli a5, a5, 2
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: xor a2, a2, a6
+; RV32I-NEXT: xor a5, a7, t0
+; RV32I-NEXT: xor a2, a5, a2
+; RV32I-NEXT: sw a2, 300(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s2, 349525
+; RV32I-NEXT: addi s2, s2, 1365
+; RV32I-NEXT: srli a2, a3, 1
+; RV32I-NEXT: and a3, a3, s2
+; RV32I-NEXT: and a2, a2, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or t5, a2, a3
+; RV32I-NEXT: srli a2, a4, 1
+; RV32I-NEXT: and a3, a4, s2
+; RV32I-NEXT: and a2, a2, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or t2, a2, a3
+; RV32I-NEXT: srli a3, a3, 31
+; RV32I-NEXT: seqz a2, a3
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 31
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 296(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, t2, 2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 1
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 292(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, t2, 4
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 2
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 288(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, t2, 8
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 3
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 284(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, t2, 16
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 4
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 280(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, t2, 32
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli s1, t5, 5
+; RV32I-NEXT: and a2, a2, s1
+; RV32I-NEXT: sw a2, 272(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, t2, 64
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli s0, t5, 6
+; RV32I-NEXT: and a2, a2, s0
+; RV32I-NEXT: sw a2, 276(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, t2, 128
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 7
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 268(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, t2, 256
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 8
+; RV32I-NEXT: and s5, a2, a3
+; RV32I-NEXT: andi a2, t2, 512
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 9
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 260(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, t2, 1024
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 10
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 264(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, t2, s6
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 11
+; RV32I-NEXT: and s1, a2, a3
; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
+; RV32I-NEXT: and a2, t2, a0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 12
+; RV32I-NEXT: and s9, a2, a3
; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a2, t2, a0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 13
+; RV32I-NEXT: and s7, a2, a3
; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: and a2, t2, a0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 14
+; RV32I-NEXT: and s6, a2, a3
; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
+; RV32I-NEXT: and a2, t2, a0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 15
+; RV32I-NEXT: and s8, a2, a3
; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
+; RV32I-NEXT: and a2, t2, a0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 16
+; RV32I-NEXT: and s10, a2, a3
; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
+; RV32I-NEXT: and a2, t2, a0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t5, 17
+; RV32I-NEXT: and s0, a2, a3
; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: and a3, t2, a0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, t5, 18
+; RV32I-NEXT: and ra, a3, a4
; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: and a3, t2, a0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, t5, 19
+; RV32I-NEXT: and s11, a3, a4
; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
+; RV32I-NEXT: and a3, t2, a0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, t5, 20
+; RV32I-NEXT: and a4, a3, a4
; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
+; RV32I-NEXT: and a3, t2, a0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a0, a3, -1
+; RV32I-NEXT: slli a3, t5, 21
+; RV32I-NEXT: and a3, a0, a3
; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
+; RV32I-NEXT: and a0, t2, a0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli t0, t5, 22
+; RV32I-NEXT: and a2, a0, t0
; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
+; RV32I-NEXT: and t0, t2, a0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: slli a1, t5, 23
+; RV32I-NEXT: and a1, t0, a1
; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s4, s5, a0
+; RV32I-NEXT: and t0, t2, a0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: slli a5, t5, 24
+; RV32I-NEXT: and a5, t0, a5
; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s3, a0
-; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s7, a0
+; RV32I-NEXT: and t0, t2, a0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: slli a6, t5, 25
+; RV32I-NEXT: and a6, t0, a6
; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and s5, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s5
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s8, s7, a0
+; RV32I-NEXT: and t0, t2, a0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: slli a7, t5, 26
+; RV32I-NEXT: and a7, t0, a7
; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and s7, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s7
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s8, a0
+; RV32I-NEXT: and t0, t2, a0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: slli t1, t5, 27
+; RV32I-NEXT: and t0, t0, t1
; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and s8, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s10, s9, a0
+; RV32I-NEXT: and t1, t2, a0
+; RV32I-NEXT: seqz t1, t1
+; RV32I-NEXT: addi t1, t1, -1
+; RV32I-NEXT: slli t3, t5, 28
+; RV32I-NEXT: and t1, t1, t3
; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and s9, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s9
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s10, a0
+; RV32I-NEXT: and t3, t2, a0
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: slli t4, t5, 29
+; RV32I-NEXT: and t3, t3, t4
; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and s10, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s10
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s1, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: andi a1, s1, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: andi a1, s1, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: and a1, s1, s6
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 1
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 2
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a1, 4
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 8
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 16
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 32
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 64
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a1, 128
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 256
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 512
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 1024
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 2048
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 4096
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a1, 8192
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 16384
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 32768
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 65536
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 131072
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 262144
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 524288
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor a0, a0, s4
-; RV32I-NEXT: lw s1, 120(sp) # 4-byte Folded Reload
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, a0, s2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s5
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s7
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s9
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s10
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: lw ra, 172(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 168(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 164(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 160(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 156(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 152(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 148(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 144(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 140(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 136(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 132(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s10, 128(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s11, 124(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 176
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: clmul_i64:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s4, s3, a0
-; RV64I-NEXT: andi a1, s1, 128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 256
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 512
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 1024
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: li s3, 1
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: slli a1, s3, 11
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 32
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 64
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 256
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 512
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 1024
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2048
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 4096
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 16384
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 32768
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 65536
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 131072
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 262144
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: srliw a1, s1, 31
-; RV64I-NEXT: slli a1, a1, 31
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: slli a1, s3, 32
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: slli a1, s3, 33
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 34
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 35
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 36
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 37
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 38
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: slli a1, s3, 39
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: slli a1, s3, 40
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 41
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 42
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 43
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 44
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 45
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 46
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: slli a1, s3, 47
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: slli a1, s3, 48
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 49
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 50
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 51
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 52
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 53
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 54
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 55
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: slli a1, s3, 56
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: slli a1, s3, 57
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 58
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 59
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 60
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 61
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: slli a1, s3, 62
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: srli a1, s1, 63
-; RV64I-NEXT: slli a1, a1, 63
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; RV32IM-LABEL: clmul_i64:
-; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -272
-; RV32IM-NEXT: sw ra, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s0, 264(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and t4, t2, a0
+; RV32I-NEXT: andi t2, t2, 1
+; RV32I-NEXT: seqz t2, t2
+; RV32I-NEXT: addi t2, t2, -1
+; RV32I-NEXT: and t2, t2, t5
+; RV32I-NEXT: slli t5, t5, 30
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: addi t4, t4, -1
+; RV32I-NEXT: and t4, t4, t5
+; RV32I-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, t2, a0
+; RV32I-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw t5, 284(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a0, t5
+; RV32I-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 272(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, s3
+; RV32I-NEXT: lw s3, 268(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, s3, s5
+; RV32I-NEXT: xor s1, s1, s9
+; RV32I-NEXT: xor s0, s10, s0
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: xor a2, t3, t4
+; RV32I-NEXT: xor t2, t2, t5
+; RV32I-NEXT: lw t3, 276(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, t3
+; RV32I-NEXT: lw t3, 260(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, s5, t3
+; RV32I-NEXT: xor t4, s1, s7
+; RV32I-NEXT: xor t5, s0, ra
+; RV32I-NEXT: xor a1, a1, a5
+; RV32I-NEXT: lw a5, 296(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a5
+; RV32I-NEXT: xor a0, t2, a0
+; RV32I-NEXT: lw a5, 264(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, t3, a5
+; RV32I-NEXT: xor t2, t4, s6
+; RV32I-NEXT: xor t3, t5, s11
+; RV32I-NEXT: xor a1, a1, a6
+; RV32I-NEXT: xor a0, a0, a5
+; RV32I-NEXT: xor a5, t2, s8
+; RV32I-NEXT: xor a4, t3, a4
+; RV32I-NEXT: xor a1, a1, a7
+; RV32I-NEXT: xor a0, a0, a5
+; RV32I-NEXT: xor a3, a4, a3
+; RV32I-NEXT: xor a1, a1, t0
+; RV32I-NEXT: xor a0, a0, a3
+; RV32I-NEXT: xor a1, a1, t1
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: srli a2, a0, 24
+; RV32I-NEXT: lw a3, 312(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: and a2, a0, a3
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: and a0, a0, s4
+; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: slli a0, a0, 4
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: and a0, a0, t6
+; RV32I-NEXT: and a1, a1, t6
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a1, 349525
+; RV32I-NEXT: addi a1, a1, 1364
+; RV32I-NEXT: and a2, a0, s2
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: slli a2, a2, 1
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: lw a1, 300(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a0, a1
+; RV32I-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 304(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: lw ra, 364(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 360(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 356(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 352(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 348(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 344(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 340(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 336(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 332(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 328(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 324(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 320(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s11, 316(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 368
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: clmul_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -576
+; RV64I-NEXT: sd ra, 568(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 560(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 552(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 544(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 536(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 528(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 520(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 512(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 504(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 496(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 488(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 480(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 472(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s6, a0
+; RV64I-NEXT: slli a6, a0, 1
+; RV64I-NEXT: andi s10, a1, 2
+; RV64I-NEXT: slli a3, a0, 2
+; RV64I-NEXT: andi s11, a1, 4
+; RV64I-NEXT: slli t0, a0, 3
+; RV64I-NEXT: andi s9, a1, 8
+; RV64I-NEXT: slli a0, a0, 4
+; RV64I-NEXT: andi s8, a1, 16
+; RV64I-NEXT: slli t5, s6, 5
+; RV64I-NEXT: andi s7, a1, 32
+; RV64I-NEXT: slli a4, s6, 6
+; RV64I-NEXT: andi s4, a1, 64
+; RV64I-NEXT: slli t6, s6, 7
+; RV64I-NEXT: andi s1, a1, 128
+; RV64I-NEXT: slli t3, s6, 8
+; RV64I-NEXT: andi s0, a1, 256
+; RV64I-NEXT: slli s3, s6, 9
+; RV64I-NEXT: andi t1, a1, 512
+; RV64I-NEXT: slli ra, s6, 10
+; RV64I-NEXT: andi a2, a1, 1024
+; RV64I-NEXT: lui s2, 1
+; RV64I-NEXT: lui s5, 2
+; RV64I-NEXT: lui a7, 16
+; RV64I-NEXT: lui t2, 32
+; RV64I-NEXT: seqz t4, s10
+; RV64I-NEXT: addi t4, t4, -1
+; RV64I-NEXT: and a5, t4, a6
+; RV64I-NEXT: sd a5, 448(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a5, 64
+; RV64I-NEXT: seqz a6, s11
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a3, a6, a3
+; RV64I-NEXT: sd a3, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a3, 128
+; RV64I-NEXT: seqz a6, s9
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a6, a6, t0
+; RV64I-NEXT: sd a6, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui t4, 256
+; RV64I-NEXT: seqz a6, s8
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a0, a6, a0
+; RV64I-NEXT: sd a0, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui t0, 512
+; RV64I-NEXT: seqz a6, s7
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a0, a6, t5
+; RV64I-NEXT: sd a0, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui s7, 1024
+; RV64I-NEXT: seqz a6, s4
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a0, a6, a4
+; RV64I-NEXT: sd a0, 456(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui s10, 2048
+; RV64I-NEXT: seqz a4, s1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a0, a4, t6
+; RV64I-NEXT: sd a0, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a6, 4096
+; RV64I-NEXT: seqz a4, s0
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a0, a4, t3
+; RV64I-NEXT: sd a0, 392(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, s6, 31
+; RV64I-NEXT: seqz a4, t1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, s3
+; RV64I-NEXT: sd a4, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sraiw a4, a1, 31
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a2, a2, ra
+; RV64I-NEXT: sd a2, 464(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, s6, 63
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a0, a4, a0
+; RV64I-NEXT: sd a0, 376(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli a4, a1, 63
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a2, a4, a2
+; RV64I-NEXT: sd a2, 384(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 8192
+; RV64I-NEXT: and a0, a1, s2
+; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, s5
+; RV64I-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a0, 4
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: sd a0, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a0, 8
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, a7
+; RV64I-NEXT: sd a0, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, t2
+; RV64I-NEXT: sd a0, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a5, a1, a5
+; RV64I-NEXT: sd a5, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, a1, a3
+; RV64I-NEXT: sd a3, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, t4
+; RV64I-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, t0
+; RV64I-NEXT: sd a0, 288(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, s7
+; RV64I-NEXT: sd a0, 312(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, s10
+; RV64I-NEXT: sd a0, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, a6
+; RV64I-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a1, a2
+; RV64I-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 16384
+; RV64I-NEXT: and a2, a1, a2
+; RV64I-NEXT: sd a2, 160(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 32768
+; RV64I-NEXT: and a2, a1, a2
+; RV64I-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 65536
+; RV64I-NEXT: and a2, a1, a2
+; RV64I-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 131072
+; RV64I-NEXT: and a2, a1, a2
+; RV64I-NEXT: sd a2, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 262144
+; RV64I-NEXT: and a2, a1, a2
+; RV64I-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a0, 1
+; RV64I-NEXT: slli a3, a0, 11
+; RV64I-NEXT: slli s0, a0, 32
+; RV64I-NEXT: slli t6, a0, 33
+; RV64I-NEXT: slli s3, a0, 34
+; RV64I-NEXT: slli s7, a0, 35
+; RV64I-NEXT: slli s10, a0, 36
+; RV64I-NEXT: slli ra, a0, 37
+; RV64I-NEXT: slli a4, a0, 38
+; RV64I-NEXT: slli a5, a0, 39
+; RV64I-NEXT: slli a6, a0, 40
+; RV64I-NEXT: slli a7, a0, 41
+; RV64I-NEXT: slli t0, a0, 42
+; RV64I-NEXT: slli t1, a0, 43
+; RV64I-NEXT: slli t2, a0, 44
+; RV64I-NEXT: slli t3, a0, 45
+; RV64I-NEXT: slli t4, a0, 46
+; RV64I-NEXT: slli a2, a0, 47
+; RV64I-NEXT: sd a2, 360(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t5, a0, 48
+; RV64I-NEXT: slli s2, a0, 49
+; RV64I-NEXT: slli s1, a0, 50
+; RV64I-NEXT: slli s4, a0, 51
+; RV64I-NEXT: slli s5, a0, 52
+; RV64I-NEXT: slli s8, a0, 53
+; RV64I-NEXT: slli s9, a0, 54
+; RV64I-NEXT: slli s11, a0, 55
+; RV64I-NEXT: slli a2, a0, 56
+; RV64I-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a0, 57
+; RV64I-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a0, 58
+; RV64I-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a0, 59
+; RV64I-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a0, 60
+; RV64I-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a0, 61
+; RV64I-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a0, 62
+; RV64I-NEXT: and a3, a1, a3
+; RV64I-NEXT: sd a3, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and s0, a1, s0
+; RV64I-NEXT: and a3, a1, t6
+; RV64I-NEXT: sd a3, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t6, a1, s3
+; RV64I-NEXT: and s7, a1, s7
+; RV64I-NEXT: and s10, a1, s10
+; RV64I-NEXT: and ra, a1, ra
+; RV64I-NEXT: and a4, a1, a4
+; RV64I-NEXT: sd a4, 168(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a5, a1, a5
+; RV64I-NEXT: sd a5, 176(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, a1, a6
+; RV64I-NEXT: sd a3, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, a1, a7
+; RV64I-NEXT: sd a3, 192(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, a1, t0
+; RV64I-NEXT: sd a3, 208(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, a1, t1
+; RV64I-NEXT: sd a3, 224(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, a1, t2
+; RV64I-NEXT: sd a3, 232(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, a1, t3
+; RV64I-NEXT: sd a3, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, a1, t4
+; RV64I-NEXT: sd a3, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 360(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: sd a0, 272(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, t5
+; RV64I-NEXT: sd a0, 280(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, s2
+; RV64I-NEXT: sd a0, 296(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and s1, a1, s1
+; RV64I-NEXT: sd s1, 304(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, s4
+; RV64I-NEXT: sd a0, 320(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, s5
+; RV64I-NEXT: sd a0, 328(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, s8
+; RV64I-NEXT: sd a0, 336(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a1, s9
+; RV64I-NEXT: sd a0, 344(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t4, a1, s11
+; RV64I-NEXT: ld a0, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: sd a0, 352(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and t5, a1, a0
+; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: sd a0, 360(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and s1, a1, s1
+; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and s3, a1, a0
+; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and s5, a1, a0
+; RV64I-NEXT: and a2, a1, a2
+; RV64I-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: seqz a1, a1
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a1, a1, s6
+; RV64I-NEXT: ld a3, 448(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a1, a3
+; RV64I-NEXT: ld a0, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: ld a2, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a4, 392(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, a4
+; RV64I-NEXT: ld a4, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s6, 12
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s6, 13
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s6, 14
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: ld a5, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a5
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s6, 15
+; RV64I-NEXT: and a5, a5, a6
+; RV64I-NEXT: ld a6, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a6, a6
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: slli a7, s6, 16
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: sd a6, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a6, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a6, a6
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: slli a7, s6, 17
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: sd a6, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a6, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a6, a6
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: slli a7, s6, 18
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: sd a6, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a6, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a6, a6
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: slli a7, s6, 19
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: ld a7, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a7, a7
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: slli t0, s6, 20
+; RV64I-NEXT: and a7, a7, t0
+; RV64I-NEXT: ld t0, 288(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t0, t0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: slli t1, s6, 21
+; RV64I-NEXT: and t0, t0, t1
+; RV64I-NEXT: sd t0, 288(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t0, 312(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t0, t0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: slli t1, s6, 22
+; RV64I-NEXT: and t0, t0, t1
+; RV64I-NEXT: sd t0, 312(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t0, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t0, t0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: slli t1, s6, 23
+; RV64I-NEXT: and t0, t0, t1
+; RV64I-NEXT: sd t0, 392(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t0, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t0, t0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: slli t1, s6, 24
+; RV64I-NEXT: and t0, t0, t1
+; RV64I-NEXT: sd t0, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t0, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t0, t0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: slli t1, s6, 25
+; RV64I-NEXT: and t0, t0, t1
+; RV64I-NEXT: ld t1, 160(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: slli t2, s6, 26
+; RV64I-NEXT: and t1, t1, t2
+; RV64I-NEXT: ld t2, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: slli t3, s6, 27
+; RV64I-NEXT: and t2, t2, t3
+; RV64I-NEXT: sd t2, 200(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t2, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: slli t3, s6, 28
+; RV64I-NEXT: and t2, t2, t3
+; RV64I-NEXT: sd t2, 216(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t2, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: slli t3, s6, 29
+; RV64I-NEXT: and t2, t2, t3
+; RV64I-NEXT: sd t2, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t2, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: slli t3, s6, 30
+; RV64I-NEXT: and t2, t2, t3
+; RV64I-NEXT: sd t2, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a0, a3, a0
+; RV64I-NEXT: sd a0, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 456(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, a2, a0
+; RV64I-NEXT: sd a0, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 11
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a4, a4, a5
+; RV64I-NEXT: sd a4, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a0, a6, a7
+; RV64I-NEXT: sd a0, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a0, t0, t1
+; RV64I-NEXT: sd a0, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a1, s0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 32
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 33
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a1, t6
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 34
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a1, s7
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 35
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a1, s10
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 36
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a1, ra
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 37
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 38
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 456(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 39
+; RV64I-NEXT: and s2, a1, a2
+; RV64I-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 40
+; RV64I-NEXT: and s7, a1, a2
+; RV64I-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 41
+; RV64I-NEXT: and s4, a1, a2
+; RV64I-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 42
+; RV64I-NEXT: and s9, a1, a2
+; RV64I-NEXT: ld a0, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 43
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli s0, s6, 44
+; RV64I-NEXT: and a1, a1, s0
+; RV64I-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 45
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 46
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 272(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a2, s6, 47
+; RV64I-NEXT: and t0, a1, a2
+; RV64I-NEXT: ld a0, 280(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: slli a3, s6, 48
+; RV64I-NEXT: and a7, a1, a3
+; RV64I-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a3, a0
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: slli a4, s6, 49
+; RV64I-NEXT: and t1, a3, a4
+; RV64I-NEXT: ld a0, 304(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a0
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s6, 50
+; RV64I-NEXT: and t2, a4, a5
+; RV64I-NEXT: ld a0, 320(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a0
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s6, 51
+; RV64I-NEXT: and t3, a5, a6
+; RV64I-NEXT: ld a0, 328(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a6, a0
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: slli s8, s6, 52
+; RV64I-NEXT: and ra, a6, s8
+; RV64I-NEXT: ld a0, 336(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a6, a0
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: slli s8, s6, 53
+; RV64I-NEXT: and s8, a6, s8
+; RV64I-NEXT: ld a0, 344(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a6, a0
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: slli s10, s6, 54
+; RV64I-NEXT: and s10, a6, s10
+; RV64I-NEXT: seqz a6, t4
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: slli s11, s6, 55
+; RV64I-NEXT: and t4, a6, s11
+; RV64I-NEXT: ld a0, 352(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz s11, a0
+; RV64I-NEXT: addi s11, s11, -1
+; RV64I-NEXT: slli a0, s6, 56
+; RV64I-NEXT: and a6, s11, a0
+; RV64I-NEXT: seqz s11, t5
+; RV64I-NEXT: addi s11, s11, -1
+; RV64I-NEXT: slli t5, s6, 57
+; RV64I-NEXT: and t5, s11, t5
+; RV64I-NEXT: ld a0, 360(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz s11, a0
+; RV64I-NEXT: addi s11, s11, -1
+; RV64I-NEXT: slli t6, s6, 58
+; RV64I-NEXT: and t6, s11, t6
+; RV64I-NEXT: seqz s11, s1
+; RV64I-NEXT: addi s11, s11, -1
+; RV64I-NEXT: slli s1, s6, 59
+; RV64I-NEXT: and s1, s11, s1
+; RV64I-NEXT: seqz s11, s3
+; RV64I-NEXT: addi s11, s11, -1
+; RV64I-NEXT: slli s3, s6, 60
+; RV64I-NEXT: and s3, s11, s3
+; RV64I-NEXT: seqz s11, s5
+; RV64I-NEXT: addi s11, s11, -1
+; RV64I-NEXT: slli s5, s6, 61
+; RV64I-NEXT: and s5, s11, s5
+; RV64I-NEXT: slli s6, s6, 62
+; RV64I-NEXT: ld a0, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz s11, a0
+; RV64I-NEXT: addi s11, s11, -1
+; RV64I-NEXT: and s6, s11, s6
+; RV64I-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s11, a0, a1
+; RV64I-NEXT: ld a0, 464(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a3, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 288(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a4, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, s0
+; RV64I-NEXT: xor s2, s2, s7
+; RV64I-NEXT: xor a7, t0, a7
+; RV64I-NEXT: xor a6, a6, t5
+; RV64I-NEXT: xor a0, s11, a0
+; RV64I-NEXT: ld t0, 448(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, t0
+; RV64I-NEXT: ld t0, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, t0
+; RV64I-NEXT: ld t0, 312(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a3, t0
+; RV64I-NEXT: ld t0, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a4, t0
+; RV64I-NEXT: ld t0, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, t0
+; RV64I-NEXT: xor t0, s2, s4
+; RV64I-NEXT: xor a7, a7, t1
+; RV64I-NEXT: xor a6, a6, t6
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 392(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: xor a5, t0, s9
+; RV64I-NEXT: xor a7, a7, t2
+; RV64I-NEXT: xor a6, a6, s1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 160(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: xor a5, a7, t3
+; RV64I-NEXT: xor a6, a6, s3
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 232(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a4, a3
+; RV64I-NEXT: xor a3, a5, ra
+; RV64I-NEXT: xor a5, a6, s5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a4, a2
+; RV64I-NEXT: xor a3, a3, s8
+; RV64I-NEXT: xor a4, a5, s6
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: xor a2, a3, s10
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: xor a1, a2, t4
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a4, a1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld ra, 568(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 560(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 552(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 544(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 536(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 528(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 520(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 512(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 504(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 496(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 488(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 480(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 472(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 576
+; RV64I-NEXT: ret
+;
+; RV32IM-LABEL: clmul_i64:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: addi sp, sp, -272
+; RV32IM-NEXT: sw ra, 268(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s0, 264(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s1, 260(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s2, 256(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s3, 252(sp) # 4-byte Folded Spill
@@ -4846,120 +5532,104 @@ declare i8 @use(i8, i1)
define void @commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; RV32I-LABEL: commutative_clmul_i8:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, a0, s4
-; RV32I-NEXT: andi a1, s3, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s3, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s3, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s4, s5, a0
-; RV32I-NEXT: andi a1, s3, -128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: sb a0, 0(s1)
-; RV32I-NEXT: sb a0, 0(s0)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: slli a4, a1, 30
+; RV32I-NEXT: slli a5, a0, 1
+; RV32I-NEXT: slli a6, a1, 29
+; RV32I-NEXT: slli a7, a0, 2
+; RV32I-NEXT: slli t0, a1, 28
+; RV32I-NEXT: slli t1, a0, 3
+; RV32I-NEXT: slli t2, a1, 27
+; RV32I-NEXT: srli a4, a4, 31
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: slli a5, a0, 4
+; RV32I-NEXT: srli a6, a6, 31
+; RV32I-NEXT: neg a6, a6
+; RV32I-NEXT: and a6, a6, a7
+; RV32I-NEXT: slli a7, a1, 26
+; RV32I-NEXT: srli t0, t0, 31
+; RV32I-NEXT: neg t0, t0
+; RV32I-NEXT: and t0, t0, t1
+; RV32I-NEXT: slli t1, a0, 5
+; RV32I-NEXT: srli t2, t2, 31
+; RV32I-NEXT: neg t2, t2
+; RV32I-NEXT: and a5, t2, a5
+; RV32I-NEXT: slli t2, a1, 25
+; RV32I-NEXT: srli a7, a7, 31
+; RV32I-NEXT: neg a7, a7
+; RV32I-NEXT: and a7, a7, t1
+; RV32I-NEXT: slli t1, a0, 6
+; RV32I-NEXT: srli t2, t2, 31
+; RV32I-NEXT: neg t2, t2
+; RV32I-NEXT: and t1, t2, t1
+; RV32I-NEXT: slli t2, a1, 31
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: srai t2, t2, 31
+; RV32I-NEXT: and t2, t2, a0
+; RV32I-NEXT: slli a0, a0, 7
+; RV32I-NEXT: srli a1, a1, 31
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: xor a1, t2, a4
+; RV32I-NEXT: xor a4, a6, t0
+; RV32I-NEXT: xor a5, a5, a7
+; RV32I-NEXT: xor a1, a1, a4
+; RV32I-NEXT: xor a4, a5, t1
+; RV32I-NEXT: xor a1, a1, a4
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a0, 0(a3)
; RV32I-NEXT: ret
;
; RV64I-LABEL: commutative_clmul_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a3
-; RV64I-NEXT: mv s1, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, a0, s4
-; RV64I-NEXT: andi a1, s3, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s3, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: andi a1, s3, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s4, s5, a0
-; RV64I-NEXT: andi a1, s3, -128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: sb a0, 0(s1)
-; RV64I-NEXT: sb a0, 0(s0)
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: slli a4, a1, 62
+; RV64I-NEXT: slli a5, a0, 1
+; RV64I-NEXT: slli a6, a1, 61
+; RV64I-NEXT: slli a7, a0, 2
+; RV64I-NEXT: slli t0, a1, 60
+; RV64I-NEXT: slli t1, a0, 3
+; RV64I-NEXT: slli t2, a1, 59
+; RV64I-NEXT: srli a4, a4, 63
+; RV64I-NEXT: neg a4, a4
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: slli a5, a0, 4
+; RV64I-NEXT: srli a6, a6, 63
+; RV64I-NEXT: neg a6, a6
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: slli a7, a1, 58
+; RV64I-NEXT: srli t0, t0, 63
+; RV64I-NEXT: neg t0, t0
+; RV64I-NEXT: and t0, t0, t1
+; RV64I-NEXT: slli t1, a0, 5
+; RV64I-NEXT: srli t2, t2, 63
+; RV64I-NEXT: neg t2, t2
+; RV64I-NEXT: and a5, t2, a5
+; RV64I-NEXT: slli t2, a1, 57
+; RV64I-NEXT: srli a7, a7, 63
+; RV64I-NEXT: neg a7, a7
+; RV64I-NEXT: and a7, a7, t1
+; RV64I-NEXT: slli t1, a0, 6
+; RV64I-NEXT: srli t2, t2, 63
+; RV64I-NEXT: neg t2, t2
+; RV64I-NEXT: and t1, t2, t1
+; RV64I-NEXT: slli t2, a1, 63
+; RV64I-NEXT: slli a1, a1, 56
+; RV64I-NEXT: srai t2, t2, 63
+; RV64I-NEXT: and t2, t2, a0
+; RV64I-NEXT: slli a0, a0, 7
+; RV64I-NEXT: srli a1, a1, 63
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: xor a1, t2, a4
+; RV64I-NEXT: xor a4, a6, t0
+; RV64I-NEXT: xor a5, a5, a7
+; RV64I-NEXT: xor a1, a1, a4
+; RV64I-NEXT: xor a4, a5, t1
+; RV64I-NEXT: xor a1, a1, a4
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a0, 0(a3)
; RV64I-NEXT: ret
;
; CHECK-M-LABEL: commutative_clmul_i8:
@@ -5029,124 +5699,126 @@ define void @commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; RV32I-LABEL: mul_use_commutative_clmul_i8:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, a0, s4
-; RV32I-NEXT: andi a1, s3, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s3, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s3, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s4, s5, a0
-; RV32I-NEXT: andi a1, s3, -128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s4, a0
-; RV32I-NEXT: sb s2, 0(s1)
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: slli a3, a1, 30
+; RV32I-NEXT: slli a4, a0, 1
+; RV32I-NEXT: slli a5, a1, 29
+; RV32I-NEXT: slli a6, a0, 2
+; RV32I-NEXT: slli a7, a1, 28
+; RV32I-NEXT: slli t0, a0, 3
+; RV32I-NEXT: slli t1, a1, 27
+; RV32I-NEXT: srli a3, a3, 31
+; RV32I-NEXT: neg a3, a3
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: slli a4, a0, 4
+; RV32I-NEXT: srli a5, a5, 31
+; RV32I-NEXT: neg a5, a5
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: slli a6, a1, 26
+; RV32I-NEXT: srli a7, a7, 31
+; RV32I-NEXT: neg a7, a7
+; RV32I-NEXT: and a7, a7, t0
+; RV32I-NEXT: slli t0, a0, 5
+; RV32I-NEXT: srli t1, t1, 31
+; RV32I-NEXT: neg t1, t1
+; RV32I-NEXT: and a4, t1, a4
+; RV32I-NEXT: slli t1, a1, 25
+; RV32I-NEXT: srli a6, a6, 31
+; RV32I-NEXT: neg a6, a6
+; RV32I-NEXT: and a6, a6, t0
+; RV32I-NEXT: slli t0, a0, 6
+; RV32I-NEXT: srli t1, t1, 31
+; RV32I-NEXT: neg t1, t1
+; RV32I-NEXT: and t0, t1, t0
+; RV32I-NEXT: slli t1, a1, 31
+; RV32I-NEXT: srai t1, t1, 31
+; RV32I-NEXT: and t1, t1, a0
+; RV32I-NEXT: xor a3, t1, a3
+; RV32I-NEXT: xor a5, a5, a7
+; RV32I-NEXT: xor a4, a4, a6
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: slli a0, a0, 7
+; RV32I-NEXT: srli a1, a1, 31
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: xor a3, a3, a5
+; RV32I-NEXT: xor a4, a4, t0
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: xor s1, a3, a0
+; RV32I-NEXT: sb s1, 0(a2)
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call use
-; RV32I-NEXT: sb s2, 0(s0)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: sb s1, 0(s0)
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: mul_use_commutative_clmul_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a3
-; RV64I-NEXT: mv s1, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, a0, s4
-; RV64I-NEXT: andi a1, s3, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s3, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: andi a1, s3, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s4, s5, a0
-; RV64I-NEXT: andi a1, s3, -128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s4, a0
-; RV64I-NEXT: sb s2, 0(s1)
-; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: slli a3, a1, 62
+; RV64I-NEXT: slli a4, a0, 1
+; RV64I-NEXT: slli a5, a1, 61
+; RV64I-NEXT: slli a6, a0, 2
+; RV64I-NEXT: slli a7, a1, 60
+; RV64I-NEXT: slli t0, a0, 3
+; RV64I-NEXT: slli t1, a1, 59
+; RV64I-NEXT: srli a3, a3, 63
+; RV64I-NEXT: neg a3, a3
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: srli a5, a5, 63
+; RV64I-NEXT: neg a5, a5
+; RV64I-NEXT: and a5, a5, a6
+; RV64I-NEXT: slli a6, a1, 58
+; RV64I-NEXT: srli a7, a7, 63
+; RV64I-NEXT: neg a7, a7
+; RV64I-NEXT: and a7, a7, t0
+; RV64I-NEXT: slli t0, a0, 5
+; RV64I-NEXT: srli t1, t1, 63
+; RV64I-NEXT: neg t1, t1
+; RV64I-NEXT: and a4, t1, a4
+; RV64I-NEXT: slli t1, a1, 57
+; RV64I-NEXT: srli a6, a6, 63
+; RV64I-NEXT: neg a6, a6
+; RV64I-NEXT: and a6, a6, t0
+; RV64I-NEXT: slli t0, a0, 6
+; RV64I-NEXT: srli t1, t1, 63
+; RV64I-NEXT: neg t1, t1
+; RV64I-NEXT: and t0, t1, t0
+; RV64I-NEXT: slli t1, a1, 63
+; RV64I-NEXT: srai t1, t1, 63
+; RV64I-NEXT: and t1, t1, a0
+; RV64I-NEXT: xor a3, t1, a3
+; RV64I-NEXT: xor a5, a5, a7
+; RV64I-NEXT: xor a4, a4, a6
+; RV64I-NEXT: slli a1, a1, 56
+; RV64I-NEXT: slli a0, a0, 7
+; RV64I-NEXT: srli a1, a1, 63
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: xor a3, a3, a5
+; RV64I-NEXT: xor a4, a4, t0
+; RV64I-NEXT: xor a3, a3, a4
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: xor s1, a3, a0
+; RV64I-NEXT: sb s1, 0(a2)
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call use
-; RV64I-NEXT: sb s2, 0(s0)
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: sb s1, 0(s0)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32IM-LABEL: mul_use_commutative_clmul_i8:
@@ -5319,120 +5991,104 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
define void @neg_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; RV32I-LABEL: neg_commutative_clmul_i8:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, a0, s4
-; RV32I-NEXT: andi a1, s3, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s3, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s3, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s4, s5, a0
-; RV32I-NEXT: andi a1, s3, -128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: sb a0, 0(s1)
-; RV32I-NEXT: sb a0, 0(s0)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: slli a4, a1, 30
+; RV32I-NEXT: slli a5, a0, 1
+; RV32I-NEXT: slli a6, a1, 29
+; RV32I-NEXT: slli a7, a0, 2
+; RV32I-NEXT: slli t0, a1, 28
+; RV32I-NEXT: slli t1, a0, 3
+; RV32I-NEXT: slli t2, a1, 27
+; RV32I-NEXT: srli a4, a4, 31
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: slli a5, a0, 4
+; RV32I-NEXT: srli a6, a6, 31
+; RV32I-NEXT: neg a6, a6
+; RV32I-NEXT: and a6, a6, a7
+; RV32I-NEXT: slli a7, a1, 26
+; RV32I-NEXT: srli t0, t0, 31
+; RV32I-NEXT: neg t0, t0
+; RV32I-NEXT: and t0, t0, t1
+; RV32I-NEXT: slli t1, a0, 5
+; RV32I-NEXT: srli t2, t2, 31
+; RV32I-NEXT: neg t2, t2
+; RV32I-NEXT: and a5, t2, a5
+; RV32I-NEXT: slli t2, a1, 25
+; RV32I-NEXT: srli a7, a7, 31
+; RV32I-NEXT: neg a7, a7
+; RV32I-NEXT: and a7, a7, t1
+; RV32I-NEXT: slli t1, a0, 6
+; RV32I-NEXT: srli t2, t2, 31
+; RV32I-NEXT: neg t2, t2
+; RV32I-NEXT: and t1, t2, t1
+; RV32I-NEXT: slli t2, a1, 31
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: srai t2, t2, 31
+; RV32I-NEXT: and t2, t2, a0
+; RV32I-NEXT: slli a0, a0, 7
+; RV32I-NEXT: srli a1, a1, 31
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: xor a1, t2, a4
+; RV32I-NEXT: xor a4, a6, t0
+; RV32I-NEXT: xor a5, a5, a7
+; RV32I-NEXT: xor a1, a1, a4
+; RV32I-NEXT: xor a4, a5, t1
+; RV32I-NEXT: xor a1, a1, a4
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a0, 0(a3)
; RV32I-NEXT: ret
;
; RV64I-LABEL: neg_commutative_clmul_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a3
-; RV64I-NEXT: mv s1, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, a0, s4
-; RV64I-NEXT: andi a1, s3, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s3, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: andi a1, s3, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s4, s5, a0
-; RV64I-NEXT: andi a1, s3, -128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: sb a0, 0(s1)
-; RV64I-NEXT: sb a0, 0(s0)
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: slli a4, a1, 62
+; RV64I-NEXT: slli a5, a0, 1
+; RV64I-NEXT: slli a6, a1, 61
+; RV64I-NEXT: slli a7, a0, 2
+; RV64I-NEXT: slli t0, a1, 60
+; RV64I-NEXT: slli t1, a0, 3
+; RV64I-NEXT: slli t2, a1, 59
+; RV64I-NEXT: srli a4, a4, 63
+; RV64I-NEXT: neg a4, a4
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: slli a5, a0, 4
+; RV64I-NEXT: srli a6, a6, 63
+; RV64I-NEXT: neg a6, a6
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: slli a7, a1, 58
+; RV64I-NEXT: srli t0, t0, 63
+; RV64I-NEXT: neg t0, t0
+; RV64I-NEXT: and t0, t0, t1
+; RV64I-NEXT: slli t1, a0, 5
+; RV64I-NEXT: srli t2, t2, 63
+; RV64I-NEXT: neg t2, t2
+; RV64I-NEXT: and a5, t2, a5
+; RV64I-NEXT: slli t2, a1, 57
+; RV64I-NEXT: srli a7, a7, 63
+; RV64I-NEXT: neg a7, a7
+; RV64I-NEXT: and a7, a7, t1
+; RV64I-NEXT: slli t1, a0, 6
+; RV64I-NEXT: srli t2, t2, 63
+; RV64I-NEXT: neg t2, t2
+; RV64I-NEXT: and t1, t2, t1
+; RV64I-NEXT: slli t2, a1, 63
+; RV64I-NEXT: slli a1, a1, 56
+; RV64I-NEXT: srai t2, t2, 63
+; RV64I-NEXT: and t2, t2, a0
+; RV64I-NEXT: slli a0, a0, 7
+; RV64I-NEXT: srli a1, a1, 63
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: xor a1, t2, a4
+; RV64I-NEXT: xor a4, a6, t0
+; RV64I-NEXT: xor a5, a5, a7
+; RV64I-NEXT: xor a1, a1, a4
+; RV64I-NEXT: xor a4, a5, t1
+; RV64I-NEXT: xor a1, a1, a4
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a0, 0(a3)
; RV64I-NEXT: ret
;
; CHECK-M-LABEL: neg_commutative_clmul_i8:
@@ -5503,2208 +6159,3451 @@ declare void @vector_use(<2 x i64>)
define void @commutative_clmul_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %p1) nounwind {
; RV32I-LABEL: commutative_clmul_v2i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -336
-; RV32I-NEXT: sw ra, 332(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 328(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 324(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 320(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 316(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 312(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 308(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 304(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 300(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 296(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 292(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s10, 288(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s11, 284(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a3, 264(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a2, 260(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s9, 0(a1)
-; RV32I-NEXT: lw s11, 4(a1)
-; RV32I-NEXT: lw s8, 8(a1)
-; RV32I-NEXT: lw s7, 12(a1)
-; RV32I-NEXT: lw s3, 0(a0)
-; RV32I-NEXT: lw s0, 4(a0)
-; RV32I-NEXT: lw s2, 8(a0)
-; RV32I-NEXT: lw s4, 12(a0)
+; RV32I-NEXT: addi sp, sp, -784
+; RV32I-NEXT: sw ra, 780(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 776(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 772(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 768(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 764(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 760(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 756(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 752(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 748(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s8, 744(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s9, 740(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s10, 736(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s11, 732(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a3, 728(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a2, 724(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t3, 0(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw s6, 12(a1)
+; RV32I-NEXT: lw t1, 0(a0)
+; RV32I-NEXT: lw t2, 4(a0)
+; RV32I-NEXT: lw s4, 8(a0)
+; RV32I-NEXT: lw s5, 12(a0)
+; RV32I-NEXT: lui s9, 16
+; RV32I-NEXT: addi s9, s9, -256
+; RV32I-NEXT: srli t4, t1, 8
+; RV32I-NEXT: srli s0, t1, 24
+; RV32I-NEXT: and a0, t1, s9
+; RV32I-NEXT: slli a1, t1, 24
+; RV32I-NEXT: srli s2, t3, 8
+; RV32I-NEXT: srli s3, t3, 24
+; RV32I-NEXT: and t6, t3, s9
+; RV32I-NEXT: slli s1, t3, 24
+; RV32I-NEXT: slli a6, t2, 1
+; RV32I-NEXT: andi a5, t3, 2
+; RV32I-NEXT: slli t5, t2, 2
+; RV32I-NEXT: andi a2, t3, 4
+; RV32I-NEXT: slli s7, t2, 3
+; RV32I-NEXT: andi a4, t3, 8
+; RV32I-NEXT: slli a3, t1, 1
+; RV32I-NEXT: srli s8, s4, 8
+; RV32I-NEXT: and t4, t4, s9
+; RV32I-NEXT: or t4, t4, s0
+; RV32I-NEXT: srli s0, s4, 24
+; RV32I-NEXT: and s2, s2, s9
+; RV32I-NEXT: or s2, s2, s3
+; RV32I-NEXT: srli s3, a7, 8
+; RV32I-NEXT: slli t6, t6, 8
+; RV32I-NEXT: or t6, s1, t6
+; RV32I-NEXT: srli s1, a7, 24
+; RV32I-NEXT: and s8, s8, s9
+; RV32I-NEXT: or s0, s8, s0
+; RV32I-NEXT: and s8, a7, s9
+; RV32I-NEXT: and s3, s3, s9
+; RV32I-NEXT: sw s9, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or s1, s3, s1
+; RV32I-NEXT: slli s3, a7, 24
+; RV32I-NEXT: slli s8, s8, 8
+; RV32I-NEXT: or s3, s3, s8
+; RV32I-NEXT: andi s8, t0, 2
+; RV32I-NEXT: slli a0, a0, 8
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: or a0, a0, t4
+; RV32I-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s4, s9
+; RV32I-NEXT: or t4, t6, s2
+; RV32I-NEXT: sw t4, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s9, s4, 24
+; RV32I-NEXT: slli a0, a0, 8
+; RV32I-NEXT: or a0, s9, a0
+; RV32I-NEXT: or a0, a0, s0
+; RV32I-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a0, t1, 2
+; RV32I-NEXT: or t4, s3, s1
+; RV32I-NEXT: sw t4, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi t4, t0, 4
+; RV32I-NEXT: seqz s0, a5
+; RV32I-NEXT: seqz t6, s8
+; RV32I-NEXT: addi s0, s0, -1
+; RV32I-NEXT: addi t6, t6, -1
+; RV32I-NEXT: and s2, s0, a6
+; RV32I-NEXT: and a5, t6, a3
+; RV32I-NEXT: and t6, s0, a3
+; RV32I-NEXT: slli a3, t1, 3
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: addi t4, t4, -1
+; RV32I-NEXT: and s3, a2, t5
+; RV32I-NEXT: and s0, t4, a0
+; RV32I-NEXT: and t4, a2, a0
+; RV32I-NEXT: andi a0, t0, 8
+; RV32I-NEXT: seqz a2, a4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and s10, a2, s7
+; RV32I-NEXT: and s7, a0, a3
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 16
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, t0, 16
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 4
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 4
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and s8, a0, a3
+; RV32I-NEXT: andi a0, t3, 32
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, t0, 32
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 5
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 5
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 64
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, t0, 64
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 6
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 6
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 128
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, t0, 128
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 7
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 7
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 256
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, t0, 256
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 8
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 8
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 512
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, t0, 512
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 9
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 9
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 1024
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, t0, 1024
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 10
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 10
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a0, 1
+; RV32I-NEXT: slli a4, a0, 11
+; RV32I-NEXT: and a0, t3, a4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a4
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 11
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 11
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 1
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 12
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 12
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 2
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 13
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 13
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 4
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 14
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 14
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 8
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 15
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 15
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a2, 16
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: lui t0, 349525
-; RV32I-NEXT: addi a7, a2, -256
-; RV32I-NEXT: sw a7, 276(sp) # 4-byte Folded Spill
-; RV32I-NEXT: addi a6, a0, -241
-; RV32I-NEXT: addi a5, a1, 819
-; RV32I-NEXT: srli a0, s3, 8
-; RV32I-NEXT: srli a1, s3, 24
-; RV32I-NEXT: and a2, s3, a7
-; RV32I-NEXT: slli a3, s3, 24
-; RV32I-NEXT: srli a4, s9, 8
-; RV32I-NEXT: and a0, a0, a7
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, s9, 24
-; RV32I-NEXT: slli a2, a2, 8
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: and a3, s9, a7
-; RV32I-NEXT: and a4, a4, a7
-; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: slli a4, s9, 24
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: addi a4, t0, 1365
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: sw a6, 268(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a0, a0, a6
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, a6
-; RV32I-NEXT: and a2, a2, a6
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: and a3, a3, a6
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: sw a5, 272(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a0, a0, a5
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, a5
-; RV32I-NEXT: and a2, a2, a5
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, a5
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 1
-; RV32I-NEXT: sw a4, 280(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a0, a0, a4
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, a4
-; RV32I-NEXT: and a2, a2, a4
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, a4
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: or s1, a2, a0
-; RV32I-NEXT: or s6, a3, a1
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 16
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 456(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 16
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 460(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 32
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 17
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 444(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 17
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 452(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 64
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 18
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 484(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 18
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 496(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 128
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 19
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 532(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 19
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 256
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 20
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 560(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 20
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 564(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 512
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 21
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 21
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 1024
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 22
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 408(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 22
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 2048
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 23
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 400(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t1, 23
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 404(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 4096
+; RV32I-NEXT: and a0, t3, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, t0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 24
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 420(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, a2, a1
+; RV32I-NEXT: sw a2, 428(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 8192
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, t0, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t2, 25
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 440(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t1, 25
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 448(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 16384
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, t0, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t2, 26
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 468(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t1, 26
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 472(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 32768
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, t0, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t2, 27
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 480(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t1, 27
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 65536
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, t0, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t2, 28
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 512(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t1, 28
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 508(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 131072
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, t0, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t2, 29
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 372(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t1, 29
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 376(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 262144
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, t0, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t2, 30
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 364(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t1, 30
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 368(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 1
+; RV32I-NEXT: srli a1, t3, 31
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, t0, 1
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 31
+; RV32I-NEXT: and a6, a0, t2
+; RV32I-NEXT: sw a6, 348(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and t3, a2, t1
+; RV32I-NEXT: and t2, a0, t1
+; RV32I-NEXT: slli t1, t1, 31
+; RV32I-NEXT: srli a0, t0, 31
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a3, a1, a3
+; RV32I-NEXT: sw a3, 380(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, t1
+; RV32I-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a1, t1
+; RV32I-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 2
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 2
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: andi a1, s6, 1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s10, a0, s5
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 1
+; RV32I-NEXT: and t1, a0, a2
+; RV32I-NEXT: slli a2, s4, 1
+; RV32I-NEXT: and t0, a1, a2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 4
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 4
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 2
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 320(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 328(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 8
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 8
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: xor s10, s10, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 3
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 304(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 3
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 16
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 16
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 4
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 276(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 4
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 292(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 32
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 32
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 5
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 256(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 5
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 64
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 64
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: xor s10, s10, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 6
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 316(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 6
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 324(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 128
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 128
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 7
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 232(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 7
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 256
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 256
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 8
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 216(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 8
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 512
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 512
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 9
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 252(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 9
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 1024
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s6, 1024
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 10
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 336(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 10
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 340(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a4, 436(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a7, a4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a4
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 11
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 11
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a0, a2
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: and a1, s6, a1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: lui a1, 1024
-; RV32I-NEXT: xor s10, s10, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 12
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 144(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 12
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 172(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 2
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: and a1, s6, a1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: lui a1, 131072
-; RV32I-NEXT: xor s10, s10, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 13
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 188(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 13
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 4
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: and a1, s6, a1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: lui a4, 349525
-; RV32I-NEXT: addi a4, a4, 1364
-; RV32I-NEXT: sw a4, 124(sp) # 4-byte Folded Spill
-; RV32I-NEXT: xor a0, s10, a0
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: lw a3, 276(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: slli a2, a0, 24
-; RV32I-NEXT: and a0, a0, a3
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: lw a2, 268(sp) # 4-byte Folded Reload
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 14
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 248(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 14
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 8
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 15
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 280(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 15
; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: lw a2, 272(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a1, 288(sp) # 4-byte Folded Spill
; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 16
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 84(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 16
; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lw a2, 280(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: and a0, a0, a2
-; RV32I-NEXT: and a1, a1, a4
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32I-NEXT: andi a1, s9, 2
-; RV32I-NEXT: sw a1, 256(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s9, 1
-; RV32I-NEXT: sw a1, 252(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s6, a0, s1
-; RV32I-NEXT: andi a1, s9, 4
-; RV32I-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s9, 8
-; RV32I-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: andi a1, s9, 16
-; RV32I-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s9, 32
-; RV32I-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s9, 64
-; RV32I-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: andi a1, s9, 128
-; RV32I-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s9, 256
-; RV32I-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s9, 512
-; RV32I-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s9, 1024
-; RV32I-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: lw s5, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, s9, s5
-; RV32I-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 196(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: lui s10, 16
-; RV32I-NEXT: and a1, s9, s10
-; RV32I-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 188(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s9, a0
+; RV32I-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 32
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 17
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 17
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 64
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 18
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 120(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 18
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 128
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 19
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 176(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 19
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 180(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 172(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 164(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 156(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s9, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 256
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 20
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 192(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 20
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 196(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 512
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 21
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 208(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 21
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 220(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 1024
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 22
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 22
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 2048
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 23
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 23
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 4096
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 24
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, s9
+; RV32I-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, s9
+; RV32I-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 8192
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 25
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 80(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 25
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 16384
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 26
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 104(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 26
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 32768
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 27
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 124(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 27
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 136(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 65536
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s5, 28
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 152(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s4, 28
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 148(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 131072
+; RV32I-NEXT: and a0, a7, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a2, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a0, s5, 29
+; RV32I-NEXT: and s11, a2, a0
+; RV32I-NEXT: slli a3, s4, 29
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: and a2, a7, a0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: and s1, s6, a0
+; RV32I-NEXT: seqz s1, s1
+; RV32I-NEXT: addi a0, a2, -1
+; RV32I-NEXT: addi s1, s1, -1
+; RV32I-NEXT: slli a6, s5, 30
+; RV32I-NEXT: and s9, a0, a6
+; RV32I-NEXT: slli a2, s4, 30
+; RV32I-NEXT: and a3, s1, a2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a7, 1
+; RV32I-NEXT: srli a7, a7, 31
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi ra, s6, 1
+; RV32I-NEXT: seqz ra, ra
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi ra, ra, -1
+; RV32I-NEXT: slli t5, s5, 31
+; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and s5, ra, s4
+; RV32I-NEXT: and a6, a0, s4
+; RV32I-NEXT: slli s4, s4, 31
+; RV32I-NEXT: srli a0, s6, 31
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi s6, a7, -1
+; RV32I-NEXT: addi a7, a0, -1
+; RV32I-NEXT: and a0, s6, t5
+; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a7, s4
+; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s6, s4
+; RV32I-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, s2
+; RV32I-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, s3, s10
+; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 444(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 400(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 364(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, t3, a5
+; RV32I-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, s0, s7
+; RV32I-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 452(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 404(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 368(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, a1, t1
+; RV32I-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 320(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a0, a1
+; RV32I-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, a0, a1
+; RV32I-NEXT: xor a0, s11, s9
+; RV32I-NEXT: sw a0, 320(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor s11, s5, t0
+; RV32I-NEXT: lw a0, 328(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 308(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, a0, a1
+; RV32I-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 268(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, a0, a1
+; RV32I-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 228(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, a0, s0
+; RV32I-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 160(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, a0, s1
+; RV32I-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, a0, a1
+; RV32I-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, a0, a1
+; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, a0, a3
+; RV32I-NEXT: xor s6, t2, t6
+; RV32I-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t4, a0
+; RV32I-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, s8, a0
+; RV32I-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a0, a1
+; RV32I-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, a0, a1
+; RV32I-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, a0, a1
+; RV32I-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 416(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a0, a1
+; RV32I-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, a0, a1
+; RV32I-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a6, a0
+; RV32I-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 312(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 296(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 244(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 240(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: lw a3, 172(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a4, a3
+; RV32I-NEXT: lw a4, 112(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 96(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a5, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a6, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a6
+; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, s8
+; RV32I-NEXT: lw s8, 348(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s8, s9
+; RV32I-NEXT: sw s8, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 484(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 456(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 420(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 444(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 484(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 380(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 408(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 400(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s8, s9
+; RV32I-NEXT: sw s8, 460(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 456(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 452(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 444(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 496(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 424(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 428(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 428(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 388(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 404(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 376(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s8, s9
+; RV32I-NEXT: sw s8, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 316(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, ra, s8
+; RV32I-NEXT: sw s8, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 252(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 372(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 188(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 368(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 120(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 364(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, s10, s8
+; RV32I-NEXT: lw s8, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 320(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor s8, s11, s5
+; RV32I-NEXT: lw s5, 324(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, s7, s5
+; RV32I-NEXT: lw s5, 264(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s0, s5
+; RV32I-NEXT: sw s0, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 200(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s1, s0
+; RV32I-NEXT: sw s0, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 132(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s2, s0
+; RV32I-NEXT: sw s0, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s3, s0
+; RV32I-NEXT: sw s0, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s4, s0
+; RV32I-NEXT: sw s0, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor t0, s6, t0
+; RV32I-NEXT: sw t0, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, t0
+; RV32I-NEXT: sw t0, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t2, t0
+; RV32I-NEXT: sw t0, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t3, t0
+; RV32I-NEXT: sw t0, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t4, t0
+; RV32I-NEXT: sw t0, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 432(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t5, t0
+; RV32I-NEXT: sw t0, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 396(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t6, t0
+; RV32I-NEXT: sw t0, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, a7, a0
+; RV32I-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a0
+; RV32I-NEXT: lw a0, 224(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a4, a0
+; RV32I-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a5, a0
+; RV32I-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a6, a0
+; RV32I-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a4, 61681
+; RV32I-NEXT: addi a4, a4, -241
+; RV32I-NEXT: sw a4, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, a0, 4
+; RV32I-NEXT: and t0, a0, a4
+; RV32I-NEXT: and a6, a6, a4
+; RV32I-NEXT: slli t0, t0, 4
+; RV32I-NEXT: or a0, a6, t0
+; RV32I-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, a0, 4
+; RV32I-NEXT: and t1, a0, a4
+; RV32I-NEXT: and a6, a6, a4
+; RV32I-NEXT: slli t1, t1, 4
+; RV32I-NEXT: or a0, a6, t1
+; RV32I-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, a1, a0
+; RV32I-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a1, a0
+; RV32I-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, a1, a0
+; RV32I-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t2, a0, 4
+; RV32I-NEXT: and t3, a0, a4
+; RV32I-NEXT: and t2, t2, a4
+; RV32I-NEXT: slli t3, t3, 4
+; RV32I-NEXT: or a0, t2, t3
+; RV32I-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t3, a0, 4
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: and t3, t3, a4
+; RV32I-NEXT: slli a0, a0, 4
+; RV32I-NEXT: or a0, t3, a0
+; RV32I-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, a1, a0
+; RV32I-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 632(sp) # 4-byte Folded Reload
; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s1, s6, a0
-; RV32I-NEXT: andi a1, s11, 2
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s11, 1
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s6, a0, s0
-; RV32I-NEXT: andi a1, s11, 4
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s11, 8
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: andi a1, s11, 16
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s11, 32
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s11, 64
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: andi a1, s11, 128
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s11, 256
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s11, 512
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s11, 1024
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: and a1, s11, s5
-; RV32I-NEXT: mv s9, s5
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: and a1, s11, s10
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, a1, a0
+; RV32I-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s10, a0
+; RV32I-NEXT: xor s4, s8, s9
+; RV32I-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 624(sp) # 4-byte Folded Reload
; RV32I-NEXT: xor s0, s0, a0
+; RV32I-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, a1, a0
+; RV32I-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, a1, a0
+; RV32I-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a1, a0
+; RV32I-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, a0, a1
+; RV32I-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a1, a0
+; RV32I-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a1, a0
+; RV32I-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a1, a0
+; RV32I-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a1, a0
+; RV32I-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a0, a1
+; RV32I-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a2, a0
+; RV32I-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a3, a0
+; RV32I-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 496(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a0
+; RV32I-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 476(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, a0, s11
+; RV32I-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, ra, a0
+; RV32I-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, s8, a1
+; RV32I-NEXT: lw s8, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: lw s8, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: lw s9, 564(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, s10, s9
+; RV32I-NEXT: lw s10, 472(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, s6, s10
+; RV32I-NEXT: lw s6, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, s6, s5
+; RV32I-NEXT: lw s6, 280(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, s1, s6
+; RV32I-NEXT: lw s6, 192(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s2, s6
+; RV32I-NEXT: lw s6, 104(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, s6
+; RV32I-NEXT: xor s0, s4, s0
+; RV32I-NEXT: lw s4, 288(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, s4
+; RV32I-NEXT: lw s4, 196(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, s4
+; RV32I-NEXT: lw s4, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, s4
+; RV32I-NEXT: xor t2, t6, t2
+; RV32I-NEXT: lw t6, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, t6
+; RV32I-NEXT: lw t6, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, t6
+; RV32I-NEXT: lw t6, 492(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t6
+; RV32I-NEXT: xor a4, t1, a4
+; RV32I-NEXT: lw t1, 300(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t1
+; RV32I-NEXT: lw t1, 204(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, t1
+; RV32I-NEXT: lw t1, 116(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, t1
+; RV32I-NEXT: xor t1, s11, ra
+; RV32I-NEXT: lw t6, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, t6
+; RV32I-NEXT: lw t6, 480(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, t6
+; RV32I-NEXT: xor t6, s7, s8
+; RV32I-NEXT: lw s4, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, s9, s4
+; RV32I-NEXT: lw s6, 488(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s10, s6
+; RV32I-NEXT: xor s1, s5, s1
+; RV32I-NEXT: lw s5, 208(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s2, s5
+; RV32I-NEXT: lw s5, 124(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, s5
+; RV32I-NEXT: xor t3, s0, t3
+; RV32I-NEXT: lw s0, 212(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, s0
+; RV32I-NEXT: lw s0, 128(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, s0
+; RV32I-NEXT: xor a6, t2, a6
+; RV32I-NEXT: lw t2, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, t2
+; RV32I-NEXT: lw t2, 504(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t2
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a5, 220(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a2, a5
+; RV32I-NEXT: lw a2, 136(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a2
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi s7, a2, 819
+; RV32I-NEXT: lw a2, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t2, a2, 2
+; RV32I-NEXT: and s0, a2, s7
+; RV32I-NEXT: and t2, t2, s7
+; RV32I-NEXT: slli s0, s0, 2
+; RV32I-NEXT: or t2, t2, s0
+; RV32I-NEXT: lw a2, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli s0, a2, 2
+; RV32I-NEXT: and s5, a2, s7
+; RV32I-NEXT: and s0, s0, s7
+; RV32I-NEXT: slli s5, s5, 2
+; RV32I-NEXT: or s0, s0, s5
+; RV32I-NEXT: xor a0, t1, a0
+; RV32I-NEXT: lw a2, 512(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor t1, t6, s4
+; RV32I-NEXT: lw a2, 508(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, s6, a2
+; RV32I-NEXT: lw a2, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli s4, a2, 2
+; RV32I-NEXT: and s5, a2, s7
+; RV32I-NEXT: and s4, s4, s7
+; RV32I-NEXT: slli s5, s5, 2
+; RV32I-NEXT: or s4, s4, s5
+; RV32I-NEXT: lw a2, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli s5, a2, 2
+; RV32I-NEXT: and s6, a2, s7
+; RV32I-NEXT: and s5, s5, s7
+; RV32I-NEXT: slli s6, s6, 2
+; RV32I-NEXT: or s5, s5, s6
+; RV32I-NEXT: xor s1, s1, s2
+; RV32I-NEXT: lw a2, 152(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s3, a2
+; RV32I-NEXT: xor t3, t3, t4
+; RV32I-NEXT: lw a2, 148(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t5, a2
+; RV32I-NEXT: xor a6, a6, a7
+; RV32I-NEXT: lw a2, 524(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, t0, a2
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a2, 156(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a1, t1, t6
+; RV32I-NEXT: xor a5, s1, s2
+; RV32I-NEXT: xor t0, t3, t4
+; RV32I-NEXT: xor a6, a6, a7
+; RV32I-NEXT: xor a3, a4, a3
+; RV32I-NEXT: lw a2, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: lw a4, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a1, a4
+; RV32I-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a1
+; RV32I-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, t0, a1
+; RV32I-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a6, a1
+; RV32I-NEXT: sw a1, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a3, a1
+; RV32I-NEXT: sw a1, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s2, 349525
+; RV32I-NEXT: addi s2, s2, 1365
+; RV32I-NEXT: srli a1, t2, 1
+; RV32I-NEXT: and a3, t2, s2
+; RV32I-NEXT: and a1, a1, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: xor a0, a4, a0
+; RV32I-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a0, s4, 1
+; RV32I-NEXT: and a3, s4, s2
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or a2, a0, a3
+; RV32I-NEXT: xor a0, a7, a5
+; RV32I-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a0, s0, 1
+; RV32I-NEXT: and a3, s0, s2
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: srli a3, a3, 31
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 31
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a3, s5, 1
+; RV32I-NEXT: and a4, s5, s2
+; RV32I-NEXT: and a3, a3, s2
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: or t3, a3, a4
+; RV32I-NEXT: srli a4, a4, 31
+; RV32I-NEXT: seqz a3, a4
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a2, 31
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 2
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 1
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 4
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 2
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 8
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 3
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 16
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 4
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 32
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 5
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 64
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 6
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 128
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 7
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 256
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 8
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 512
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 9
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 1024
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 10
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a5, 436(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a3, a0, a5
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 11
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a6, 1
+; RV32I-NEXT: and a3, a0, a6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 12
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a7, 2
+; RV32I-NEXT: and a3, a0, a7
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 13
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t0, 4
+; RV32I-NEXT: and a3, a0, t0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 14
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t1, 8
+; RV32I-NEXT: and a3, a0, t1
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 15
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t2, 16
+; RV32I-NEXT: and a3, a0, t2
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 16
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t4, 32
+; RV32I-NEXT: and a3, a0, t4
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 17
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t5, 64
+; RV32I-NEXT: and a3, a0, t5
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 18
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t6, 128
+; RV32I-NEXT: and a3, a0, t6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 19
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s0, 256
+; RV32I-NEXT: and a3, a0, s0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 20
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s1, 512
+; RV32I-NEXT: and a3, a0, s1
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 21
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s3, 1024
+; RV32I-NEXT: and a3, a0, s3
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 22
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s4, 2048
+; RV32I-NEXT: and a3, a0, s4
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 23
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s5, 4096
+; RV32I-NEXT: and a3, a0, s5
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 24
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s6, 8192
+; RV32I-NEXT: and a3, a0, s6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 25
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s8, 16384
+; RV32I-NEXT: and a3, a0, s8
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 26
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a3, 32768
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 27
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a3, 65536
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 28
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a3, 131072
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, a1, 29
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: sw a3, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 1
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and a3, a3, a1
+; RV32I-NEXT: sw a3, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a1, a1, 30
+; RV32I-NEXT: lui a4, 262144
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 2
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 8
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 3
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 16
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 4
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 32
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 5
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 64
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 6
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 128
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 7
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 256
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 8
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 512
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 9
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 1024
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 10
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, a5
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 11
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, a6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 12
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, a7
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 13
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, t0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 14
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, t1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 15
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, t2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 16
+; RV32I-NEXT: and s10, a0, a1
+; RV32I-NEXT: and a0, t3, t4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 17
+; RV32I-NEXT: and s9, a0, a1
+; RV32I-NEXT: and a0, t3, t5
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 18
+; RV32I-NEXT: and s11, a0, a1
+; RV32I-NEXT: and a0, t3, t6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 19
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, s0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli s0, a2, 20
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, s1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli s1, a2, 21
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, s3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a2, 22
+; RV32I-NEXT: and s1, a0, a1
+; RV32I-NEXT: and a1, t3, s4
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a3, a2, 23
+; RV32I-NEXT: and s0, a1, a3
+; RV32I-NEXT: and a1, t3, s5
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a3, a2, 24
+; RV32I-NEXT: and s3, a1, a3
+; RV32I-NEXT: and a3, t3, s6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a2, 25
+; RV32I-NEXT: and s4, a3, a5
+; RV32I-NEXT: and a5, t3, s8
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a7, a2, 26
+; RV32I-NEXT: and s5, a5, a7
; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
+; RV32I-NEXT: and a5, t3, a0
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli t0, a2, 27
+; RV32I-NEXT: and s8, a5, t0
; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s6, s6, a0
+; RV32I-NEXT: and a5, t3, a0
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a0, a5, -1
+; RV32I-NEXT: slli a5, a2, 28
+; RV32I-NEXT: and s6, a0, a5
; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: srli a1, s2, 8
-; RV32I-NEXT: srli a2, s2, 24
-; RV32I-NEXT: lw a6, 276(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a3, s2, a6
-; RV32I-NEXT: xor a0, s6, a0
-; RV32I-NEXT: slli a4, s2, 24
-; RV32I-NEXT: xor a0, a0, s1
-; RV32I-NEXT: srli a5, s8, 8
-; RV32I-NEXT: and a1, a1, a6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: srli a2, s8, 24
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: and a4, s8, a6
-; RV32I-NEXT: and a5, a5, a6
-; RV32I-NEXT: or a2, a5, a2
-; RV32I-NEXT: slli a5, s8, 24
+; RV32I-NEXT: and a0, t3, a0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli t2, a2, 29
+; RV32I-NEXT: and t6, a0, t2
+; RV32I-NEXT: and t2, t3, a4
+; RV32I-NEXT: andi t3, t3, 1
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: and t3, t3, a2
+; RV32I-NEXT: slli a2, a2, 30
+; RV32I-NEXT: seqz t2, t2
+; RV32I-NEXT: addi t2, t2, -1
+; RV32I-NEXT: and t4, t2, a2
+; RV32I-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a1, a0
+; RV32I-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a0, a1
+; RV32I-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a0, a1
+; RV32I-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a0, a1
+; RV32I-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a0, a1
+; RV32I-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a4, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a0, a4
+; RV32I-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a0, a5
+; RV32I-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a0, a1
+; RV32I-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, a0
+; RV32I-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 532(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: lw a3, 512(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw ra, 508(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, ra
+; RV32I-NEXT: xor s9, s10, s9
+; RV32I-NEXT: xor s0, s1, s0
+; RV32I-NEXT: xor t4, t6, t4
+; RV32I-NEXT: xor a7, t5, a7
+; RV32I-NEXT: lw t5, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t5
+; RV32I-NEXT: lw t5, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t1, t5
+; RV32I-NEXT: lw t5, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, t2, t5
+; RV32I-NEXT: lw t5, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t5
+; RV32I-NEXT: lw t5, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t5
+; RV32I-NEXT: lw t5, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, t5
+; RV32I-NEXT: xor a0, t3, a0
+; RV32I-NEXT: lw t3, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, t3
+; RV32I-NEXT: lw t3, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, t3
+; RV32I-NEXT: lw t3, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, t3
+; RV32I-NEXT: xor t3, s9, s11
+; RV32I-NEXT: xor t5, s0, s3
+; RV32I-NEXT: lw t6, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, t6
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, t0
+; RV32I-NEXT: lw t1, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t2, t1
+; RV32I-NEXT: lw t2, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t2
+; RV32I-NEXT: lw t2, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a3, a2
+; RV32I-NEXT: lw a3, 496(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, t3, a3
+; RV32I-NEXT: xor t2, t5, s4
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 652(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, t0
+; RV32I-NEXT: lw t1, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t1
+; RV32I-NEXT: lw t1, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t1
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a3, a2
+; RV32I-NEXT: xor a3, t2, s5
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t0
+; RV32I-NEXT: lw t0, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t0
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: xor a2, a3, s8
+; RV32I-NEXT: xor a3, a7, a4
+; RV32I-NEXT: lw a4, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a5, a4
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a1, a2, s6
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a1, a3, a6
+; RV32I-NEXT: xor a0, a0, t4
+; RV32I-NEXT: srli a2, a1, 8
+; RV32I-NEXT: srli a3, a1, 24
+; RV32I-NEXT: slli a4, a1, 24
+; RV32I-NEXT: lw a5, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: and a2, a2, a5
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: srli a3, a0, 8
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: or a1, a4, a1
+; RV32I-NEXT: srli a4, a0, 24
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: or a3, a3, a4
+; RV32I-NEXT: and a4, a0, a5
+; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lw a5, 128(sp) # 4-byte Folded Reload
-; RV32I-NEXT: xor a0, a5, a0
-; RV32I-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: or a2, a4, a2
-; RV32I-NEXT: srli a0, a1, 4
-; RV32I-NEXT: lw s11, 268(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, s11
-; RV32I-NEXT: srli a3, a2, 4
-; RV32I-NEXT: and a2, a2, s11
-; RV32I-NEXT: and a0, a0, s11
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: and a3, a3, s11
-; RV32I-NEXT: slli a2, a2, 4
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: lw s10, 272(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a0, a0, s10
-; RV32I-NEXT: srli a3, a2, 2
-; RV32I-NEXT: and a2, a2, s10
-; RV32I-NEXT: and a1, a1, s10
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s10
-; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lw a4, 280(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: srli a2, a1, 4
+; RV32I-NEXT: lw a4, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: srli a3, a0, 4
; RV32I-NEXT: and a0, a0, a4
-; RV32I-NEXT: srli a3, a2, 1
; RV32I-NEXT: and a2, a2, a4
-; RV32I-NEXT: and a1, a1, a4
-; RV32I-NEXT: slli a0, a0, 1
; RV32I-NEXT: and a3, a3, a4
-; RV32I-NEXT: slli a2, a2, 1
-; RV32I-NEXT: or s0, a1, a0
-; RV32I-NEXT: or s5, a3, a2
-; RV32I-NEXT: andi a1, s5, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s5, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s6, a0, s1
-; RV32I-NEXT: andi a1, s5, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s5, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: andi a1, s5, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s5, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s5, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: andi a1, s5, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s5, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s5, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s5, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: and a1, s5, s9
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s6, a0
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: lw a3, 276(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: and a3, a0, a3
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s11
-; RV32I-NEXT: and a1, a1, s11
+; RV32I-NEXT: slli a1, a1, 4
; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s10
-; RV32I-NEXT: and a1, a1, s10
+; RV32I-NEXT: or a1, a2, a1
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: srli a2, a1, 2
+; RV32I-NEXT: and a1, a1, s7
+; RV32I-NEXT: srli a3, a0, 2
+; RV32I-NEXT: and a0, a0, s7
+; RV32I-NEXT: and a2, a2, s7
+; RV32I-NEXT: and a3, a3, s7
+; RV32I-NEXT: slli a1, a1, 2
+; RV32I-NEXT: or a1, a2, a1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a2, a2, 1364
; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lw a2, 280(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: srli a3, a1, 1
+; RV32I-NEXT: and a1, a1, s2
+; RV32I-NEXT: and a4, a0, s2
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: and a3, a3, a2
; RV32I-NEXT: and a0, a0, a2
-; RV32I-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: or a1, a3, a1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: srli a1, a1, 1
+; RV32I-NEXT: lw a2, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: andi a1, s8, 2
-; RV32I-NEXT: sw a1, 280(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s8, 1
-; RV32I-NEXT: sw a1, 276(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, a0, s0
-; RV32I-NEXT: andi a1, s8, 4
-; RV32I-NEXT: sw a1, 272(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s8, 8
-; RV32I-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 16
-; RV32I-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s8, 32
-; RV32I-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s8, 64
-; RV32I-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 128
-; RV32I-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s8, 256
-; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s8, 512
-; RV32I-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s8, 1024
-; RV32I-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: and a1, s8, s9
-; RV32I-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s10, s9
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui s11, 16
-; RV32I-NEXT: and a1, s8, s11
-; RV32I-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s5, s1, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s6, s0, a0
-; RV32I-NEXT: lui s0, 32768
-; RV32I-NEXT: and a1, s8, s0
-; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s6, s6, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s6, a0
-; RV32I-NEXT: xor s9, s5, a0
-; RV32I-NEXT: lui s5, 131072
-; RV32I-NEXT: and a1, s8, s5
-; RV32I-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s6, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s6, a0
-; RV32I-NEXT: lui s6, 524288
-; RV32I-NEXT: and a1, s8, s6
-; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s9, a0
-; RV32I-NEXT: andi a1, s7, 2
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s7, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, a0, s4
-; RV32I-NEXT: andi a1, s7, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s7, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s7, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s7, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s7, 256
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s7, 512
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s7, 1024
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: and a1, s7, s10
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: and a1, s7, s11
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: and a1, s7, s0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: and a1, s7, s5
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: and a1, s7, s6
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, a0, s8
-; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: xor s7, a1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, a0, s4
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 188(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 180(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 176(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 168(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 156(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 152(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 148(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 140(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 132(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s4, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 280(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 276(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, a0, s3
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 268(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw s4, 0(a1)
-; RV32I-NEXT: lw a2, 128(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a2, 4(a1)
-; RV32I-NEXT: sw a0, 8(a1)
-; RV32I-NEXT: sw s7, 12(a1)
-; RV32I-NEXT: lw a1, 264(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw s4, 0(a1)
-; RV32I-NEXT: sw a2, 4(a1)
-; RV32I-NEXT: sw a0, 8(a1)
-; RV32I-NEXT: sw s7, 12(a1)
-; RV32I-NEXT: lw ra, 332(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 328(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 324(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 320(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 316(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 312(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 308(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 304(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 300(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 296(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 292(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s10, 288(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s11, 284(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 336
+; RV32I-NEXT: lw a2, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: lw a2, 724(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a4, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a4, 0(a2)
+; RV32I-NEXT: sw a1, 4(a2)
+; RV32I-NEXT: lw a3, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a3, 8(a2)
+; RV32I-NEXT: sw a0, 12(a2)
+; RV32I-NEXT: lw a2, 728(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a4, 0(a2)
+; RV32I-NEXT: sw a1, 4(a2)
+; RV32I-NEXT: sw a3, 8(a2)
+; RV32I-NEXT: sw a0, 12(a2)
+; RV32I-NEXT: lw ra, 780(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 776(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 772(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 768(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 764(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 760(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 756(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 752(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 748(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 744(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 740(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 736(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s11, 732(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 784
; RV32I-NEXT: ret
;
; RV64I-LABEL: commutative_clmul_v2i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -352
-; RV64I-NEXT: sd ra, 344(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 336(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 328(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 320(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 312(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 304(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 296(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 288(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 280(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 272(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 264(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s10, 256(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s11, 248(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a5, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -1088
+; RV64I-NEXT: sd ra, 1080(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 1072(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 1064(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 1056(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 1048(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 1040(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 1032(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 1024(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 1016(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 1008(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 1000(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 992(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 984(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a5, 976(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a4, 968(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv ra, a1
+; RV64I-NEXT: mv s11, a0
+; RV64I-NEXT: slli t0, a0, 1
+; RV64I-NEXT: andi s10, a2, 2
+; RV64I-NEXT: slli a4, a0, 2
+; RV64I-NEXT: andi s5, a2, 4
+; RV64I-NEXT: slli a5, a0, 3
+; RV64I-NEXT: andi s6, a2, 8
+; RV64I-NEXT: slli a6, a0, 4
+; RV64I-NEXT: andi t5, a2, 16
+; RV64I-NEXT: slli a7, a0, 5
+; RV64I-NEXT: andi t6, a2, 32
+; RV64I-NEXT: slli a0, a0, 6
+; RV64I-NEXT: andi t3, a2, 64
+; RV64I-NEXT: slli t4, s11, 7
+; RV64I-NEXT: andi s3, a2, 128
+; RV64I-NEXT: slli t2, s11, 8
+; RV64I-NEXT: andi s2, a2, 256
+; RV64I-NEXT: slli a1, s11, 9
+; RV64I-NEXT: andi s4, a2, 512
+; RV64I-NEXT: slli t1, s11, 10
+; RV64I-NEXT: andi s0, a2, 1024
+; RV64I-NEXT: slli s1, s11, 31
+; RV64I-NEXT: sraiw s7, a2, 31
+; RV64I-NEXT: seqz s10, s10
+; RV64I-NEXT: addi s10, s10, -1
+; RV64I-NEXT: and t0, s10, t0
+; RV64I-NEXT: slli s10, s11, 63
+; RV64I-NEXT: seqz s5, s5
+; RV64I-NEXT: addi s5, s5, -1
+; RV64I-NEXT: and s5, s5, a4
+; RV64I-NEXT: srli s8, a2, 63
+; RV64I-NEXT: seqz a4, s6
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: slli s6, ra, 1
+; RV64I-NEXT: seqz a5, t5
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: and a5, a5, a6
+; RV64I-NEXT: andi t5, a3, 2
+; RV64I-NEXT: seqz a6, t6
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: slli t6, ra, 2
+; RV64I-NEXT: seqz a7, t3
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a7, a7, a0
+; RV64I-NEXT: sd a7, 904(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t3, a3, 4
+; RV64I-NEXT: seqz a7, s3
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a0, a7, t4
+; RV64I-NEXT: slli t4, ra, 3
+; RV64I-NEXT: seqz a7, s2
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a7, a7, t2
+; RV64I-NEXT: andi t2, a3, 8
+; RV64I-NEXT: seqz s2, s4
+; RV64I-NEXT: addi s2, s2, -1
+; RV64I-NEXT: and a1, s2, a1
+; RV64I-NEXT: sd a1, 896(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a1, ra, 4
+; RV64I-NEXT: seqz s0, s0
+; RV64I-NEXT: addi s0, s0, -1
+; RV64I-NEXT: and t1, s0, t1
+; RV64I-NEXT: sd t1, 944(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t1, a3, 16
+; RV64I-NEXT: seqz s0, s7
+; RV64I-NEXT: addi s0, s0, -1
+; RV64I-NEXT: and s0, s0, s1
+; RV64I-NEXT: sd s0, 952(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s0, ra, 5
+; RV64I-NEXT: seqz s1, s8
+; RV64I-NEXT: addi s1, s1, -1
+; RV64I-NEXT: and s1, s1, s10
+; RV64I-NEXT: sd s1, 960(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi s1, a3, 32
+; RV64I-NEXT: seqz t5, t5
+; RV64I-NEXT: addi t5, t5, -1
+; RV64I-NEXT: and t5, t5, s6
+; RV64I-NEXT: sd t5, 880(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t5, ra, 6
+; RV64I-NEXT: seqz t3, t3
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: and t3, t3, t6
+; RV64I-NEXT: sd t3, 872(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t3, a3, 64
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and t2, t2, t4
+; RV64I-NEXT: sd t2, 864(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t2, ra, 7
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: and a1, t1, a1
+; RV64I-NEXT: sd a1, 856(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t1, a3, 128
+; RV64I-NEXT: seqz t4, s1
+; RV64I-NEXT: addi t4, t4, -1
+; RV64I-NEXT: and a1, t4, s0
+; RV64I-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t4, ra, 8
+; RV64I-NEXT: seqz t3, t3
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: and a1, t3, t5
+; RV64I-NEXT: sd a1, 920(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t3, a3, 256
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: and a1, t1, t2
+; RV64I-NEXT: sd a1, 832(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t1, ra, 9
+; RV64I-NEXT: seqz t2, t3
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and a1, t2, t4
+; RV64I-NEXT: sd a1, 808(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t2, a3, 512
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and a1, t2, t1
+; RV64I-NEXT: sd a1, 912(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t1, a3, 1024
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: slli t2, ra, 10
+; RV64I-NEXT: and a1, t1, t2
+; RV64I-NEXT: sd a1, 936(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sraiw t1, a3, 31
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: slli t2, ra, 31
+; RV64I-NEXT: and a1, t1, t2
+; RV64I-NEXT: sd a1, 928(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli t1, a3, 63
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: slli t2, ra, 63
+; RV64I-NEXT: and a1, t1, t2
+; RV64I-NEXT: sd a1, 888(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t1, a2, 1
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: and t1, t1, s11
+; RV64I-NEXT: xor a1, t1, t0
+; RV64I-NEXT: sd a1, 688(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a1, s5, a4
+; RV64I-NEXT: sd a1, 680(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a1, a5, a6
+; RV64I-NEXT: sd a1, 672(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a1, a0, a7
+; RV64I-NEXT: sd a1, 664(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 12
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 816(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 13
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 840(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 14
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 8
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 15
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 16
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 784(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 17
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 800(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 64
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 18
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 824(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 128
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 19
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 256
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 20
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 512
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 21
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 760(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1024
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 22
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 768(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2048
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 23
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 776(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4096
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 24
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 792(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui s10, 8192
+; RV64I-NEXT: and a4, a2, s10
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 25
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16384
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 26
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32768
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 27
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 728(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 65536
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 28
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 131072
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 29
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 744(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 262144
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 30
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 752(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a4, 1
+; RV64I-NEXT: slli a1, a4, 11
+; RV64I-NEXT: sd a1, 600(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a5, a4, 32
+; RV64I-NEXT: slli a6, a4, 33
+; RV64I-NEXT: sd a6, 584(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, a4, 34
+; RV64I-NEXT: slli a7, a4, 35
+; RV64I-NEXT: sd a7, 640(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a7, a4, 36
+; RV64I-NEXT: sd a7, 576(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t0, a4, 37
+; RV64I-NEXT: sd t0, 624(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t1, a4, 38
+; RV64I-NEXT: sd t1, 568(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t0, a4, 39
+; RV64I-NEXT: slli t2, a4, 40
+; RV64I-NEXT: sd t2, 560(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t3, a4, 41
+; RV64I-NEXT: sd t3, 648(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s0, a4, 42
+; RV64I-NEXT: slli s1, a4, 43
+; RV64I-NEXT: slli t3, a4, 44
+; RV64I-NEXT: sd t3, 616(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t3, a4, 45
+; RV64I-NEXT: sd t3, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t4, a4, 46
+; RV64I-NEXT: sd t4, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t4, a4, 47
+; RV64I-NEXT: sd t4, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t5, a4, 48
+; RV64I-NEXT: sd t5, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t5, a4, 49
+; RV64I-NEXT: slli t6, a4, 50
+; RV64I-NEXT: sd t6, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t6, a4, 51
+; RV64I-NEXT: sd t6, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s2, a4, 52
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t6, a4, 53
+; RV64I-NEXT: slli s3, a4, 54
+; RV64I-NEXT: sd s3, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s3, a4, 55
+; RV64I-NEXT: slli s4, a4, 56
+; RV64I-NEXT: slli s5, a4, 57
+; RV64I-NEXT: slli s6, a4, 58
+; RV64I-NEXT: slli s7, a4, 59
+; RV64I-NEXT: slli s8, a4, 60
+; RV64I-NEXT: slli s9, a4, 61
+; RV64I-NEXT: slli a4, a4, 62
+; RV64I-NEXT: sd a4, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 8
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 64
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 128
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 256
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 512
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1024
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2048
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4096
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 472(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a3, s10
+; RV64I-NEXT: sd a4, 480(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16384
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32768
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 592(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 65536
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 656(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 131072
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 704(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 262144
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 712(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a1
+; RV64I-NEXT: sd a4, 456(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a5
+; RV64I-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a6
+; RV64I-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a0
+; RV64I-NEXT: sd a4, 160(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a6, 640(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, a6
+; RV64I-NEXT: sd a4, 168(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a7
+; RV64I-NEXT: sd a4, 176(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a7, 624(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, a7
+; RV64I-NEXT: sd a4, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t1
+; RV64I-NEXT: sd a4, 192(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t0
+; RV64I-NEXT: sd a4, 296(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t2
+; RV64I-NEXT: sd a4, 280(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 648(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, a1
+; RV64I-NEXT: sd a4, 344(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s0
+; RV64I-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s1
+; RV64I-NEXT: sd a4, 200(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t2, 616(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t2
+; RV64I-NEXT: sd a4, 208(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t3
+; RV64I-NEXT: sd a4, 216(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t3, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t3
+; RV64I-NEXT: sd a4, 224(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t4
; RV64I-NEXT: sd a4, 232(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s3, a3
-; RV64I-NEXT: mv s5, a2
-; RV64I-NEXT: mv s2, a1
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, a2, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: andi a1, s5, 1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, a0, s6
-; RV64I-NEXT: andi a1, s5, 4
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: andi a1, s5, 8
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s6, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: andi a1, s5, 16
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: andi a1, s5, 32
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: andi a1, s5, 64
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: andi a1, s5, 128
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: andi a1, s5, 256
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: andi a1, s5, 512
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: andi a1, s5, 1024
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: li s9, 1
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 11
+; RV64I-NEXT: ld t4, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t4
+; RV64I-NEXT: sd a4, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t5
+; RV64I-NEXT: mv s10, t5
+; RV64I-NEXT: sd a4, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t1, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t1
+; RV64I-NEXT: sd a4, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t5, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t5
+; RV64I-NEXT: sd a4, 320(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s2
+; RV64I-NEXT: sd a4, 352(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t6
+; RV64I-NEXT: sd a4, 376(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, s2
+; RV64I-NEXT: sd a4, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s3
+; RV64I-NEXT: sd a4, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s4
+; RV64I-NEXT: sd a4, 272(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s5
+; RV64I-NEXT: sd a4, 288(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s6
+; RV64I-NEXT: sd a4, 304(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s7
+; RV64I-NEXT: sd a4, 312(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s8
+; RV64I-NEXT: sd a4, 328(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s9
+; RV64I-NEXT: sd a4, 336(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: sd a2, 360(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 600(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a5, a3, a5
+; RV64I-NEXT: sd a5, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 584(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a3, a0
+; RV64I-NEXT: sd a2, 464(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a3, a6
+; RV64I-NEXT: sd a2, 488(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 576(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 512(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a3, a7
+; RV64I-NEXT: sd a2, 520(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 568(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 536(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a3, t0
+; RV64I-NEXT: sd a2, 544(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 560(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 552(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: sd a1, 560(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and s0, a3, s0
+; RV64I-NEXT: sd s0, 568(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and s1, a3, s1
+; RV64I-NEXT: sd s1, 576(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, t2
+; RV64I-NEXT: sd a1, 584(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: sd a1, 600(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, t3
+; RV64I-NEXT: sd a1, 616(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: sd a1, 624(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, t4
+; RV64I-NEXT: sd a1, 640(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, s10
+; RV64I-NEXT: sd a1, 648(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t0, a3, t1
+; RV64I-NEXT: and a0, a3, t5
+; RV64I-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a0, a3, a0
+; RV64I-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a3, t6
+; RV64I-NEXT: sd a0, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t6, a3, s2
+; RV64I-NEXT: and s0, a3, s3
+; RV64I-NEXT: and s2, a3, s4
+; RV64I-NEXT: and t5, a3, s5
+; RV64I-NEXT: and t4, a3, s6
+; RV64I-NEXT: and t3, a3, s7
+; RV64I-NEXT: and a0, a3, s8
+; RV64I-NEXT: sd a0, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t2, a3, s9
+; RV64I-NEXT: and a7, a3, a4
+; RV64I-NEXT: andi a3, a3, 1
+; RV64I-NEXT: seqz a2, a3
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a2, a2, ra
+; RV64I-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, a2, a1
+; RV64I-NEXT: ld a1, 872(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 864(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s3, a1, a2
+; RV64I-NEXT: ld a1, 856(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 848(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s4, a1, a2
+; RV64I-NEXT: ld a1, 832(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 808(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s10, a1, a2
+; RV64I-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 12
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 856(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 13
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 880(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 14
+; RV64I-NEXT: and s5, a2, a3
+; RV64I-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 15
+; RV64I-NEXT: and s6, a2, a3
+; RV64I-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 16
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 808(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 17
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 848(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 18
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 872(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 19
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a3, a1
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: slli a4, ra, 20
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: ld a1, 696(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 21
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 720(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 22
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 23
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 832(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 24
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 864(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 480(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 25
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: ld a1, 496(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 26
+; RV64I-NEXT: and t1, a5, a6
+; RV64I-NEXT: ld a1, 592(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 27
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 592(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 656(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 28
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 656(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 704(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 29
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 704(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 712(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 30
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 712(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 688(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 680(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a5
+; RV64I-NEXT: sd a1, 496(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 904(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 672(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a5, a1
+; RV64I-NEXT: sd a1, 480(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 896(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 664(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a5, a1
+; RV64I-NEXT: sd a1, 472(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 11
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 456(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 632(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 608(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a5
+; RV64I-NEXT: sd a1, 448(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 528(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 504(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a5
+; RV64I-NEXT: sd a1, 504(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a5
+; RV64I-NEXT: sd a1, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 32
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 33
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 34
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 608(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 35
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 672(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 36
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 688(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 37
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 896(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 38
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 904(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 39
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 40
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 41
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 42
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 43
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 528(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 44
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 632(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 45
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 664(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 46
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 680(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 47
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 48
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 49
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 50
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 51
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 52
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 53
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 54
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 55
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 56
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 57
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 58
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 59
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 60
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 61
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, s11, 62
+; RV64I-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: and a0, a5, a0
+; RV64I-NEXT: sd a0, 360(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a6, s1, s3
+; RV64I-NEXT: ld a0, 920(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, s4, a0
+; RV64I-NEXT: sd a0, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 912(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, s10, a0
; RV64I-NEXT: sd a0, 224(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: lui a0, 2
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: lui a0, 8
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: lui a0, 16
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 32
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 64
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: lui a0, 256
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: lui a0, 512
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 1024
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 2048
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 4096
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: lui a0, 16384
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: lui a0, 32768
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 65536
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 131072
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 262144
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 31
+; RV64I-NEXT: ld a0, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a5, ra, 11
+; RV64I-NEXT: and a0, a0, a5
+; RV64I-NEXT: sd a0, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a0, s5, s6
; RV64I-NEXT: sd a0, 216(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: slli a1, s9, 32
-; RV64I-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: slli a0, s9, 33
+; RV64I-NEXT: xor a2, a2, a3
+; RV64I-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a0, a4, t1
; RV64I-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: slli a0, s9, 34
-; RV64I-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 35
-; RV64I-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 36
+; RV64I-NEXT: ld a0, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 32
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 33
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 464(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 34
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 464(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 488(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 35
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 488(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 512(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 36
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 512(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 520(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 37
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 912(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 536(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 38
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 544(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 39
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 536(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 552(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 40
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 520(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 560(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 41
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 560(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 568(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli s1, ra, 42
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: sd a0, 568(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 576(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 43
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 576(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 584(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 44
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 584(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 600(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 45
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 600(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 616(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 46
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 616(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 624(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a4, ra, 47
+; RV64I-NEXT: and s7, a0, a4
+; RV64I-NEXT: ld a0, 640(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 48
+; RV64I-NEXT: and s6, a0, a2
+; RV64I-NEXT: ld a0, 648(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 49
+; RV64I-NEXT: and a0, a0, a3
; RV64I-NEXT: sd a0, 176(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 37
-; RV64I-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 38
-; RV64I-NEXT: sd a0, 160(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: slli a1, s9, 39
-; RV64I-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: slli a0, s9, 40
-; RV64I-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: slli a0, s9, 41
-; RV64I-NEXT: sd a0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 42
-; RV64I-NEXT: sd a0, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 43
-; RV64I-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 44
-; RV64I-NEXT: sd a0, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 45
-; RV64I-NEXT: sd a0, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 46
-; RV64I-NEXT: sd a0, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: slli a1, s9, 47
-; RV64I-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s1, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: slli a0, s9, 48
-; RV64I-NEXT: sd a0, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s6, a0
-; RV64I-NEXT: slli a0, s9, 49
-; RV64I-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 50
-; RV64I-NEXT: sd a0, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 51
-; RV64I-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 52
-; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 53
-; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 54
-; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 55
-; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s0, a0
-; RV64I-NEXT: slli a1, s9, 56
-; RV64I-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s0, s1, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: slli a0, s9, 57
-; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s7, s6, a0
-; RV64I-NEXT: slli s6, s9, 58
-; RV64I-NEXT: and a1, s5, s6
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s8, s7, a0
-; RV64I-NEXT: slli s7, s9, 59
-; RV64I-NEXT: and a1, s5, s7
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s8, s8, a0
-; RV64I-NEXT: slli s11, s9, 60
-; RV64I-NEXT: and a1, s5, s11
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s10, s8, a0
-; RV64I-NEXT: slli s8, s9, 61
-; RV64I-NEXT: and a1, s5, s8
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s10, s10, a0
-; RV64I-NEXT: slli s9, s9, 62
-; RV64I-NEXT: and a1, s5, s9
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s10, a0
-; RV64I-NEXT: li s10, -1
-; RV64I-NEXT: slli s10, s10, 63
-; RV64I-NEXT: and a1, s5, s10
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s5, s0, a0
-; RV64I-NEXT: andi a1, s3, 2
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, a0, s4
-; RV64I-NEXT: andi a1, s3, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: andi a1, s3, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
-; RV64I-NEXT: andi a1, s3, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: andi a1, s3, 128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 256
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
-; RV64I-NEXT: andi a1, s3, 512
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: andi a1, s3, 1024
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 224(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
-; RV64I-NEXT: lui a0, 2
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: lui a0, 4
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: lui a0, 8
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
-; RV64I-NEXT: lui a0, 16
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 32
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 64
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: lui a0, 128
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: lui a0, 256
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
-; RV64I-NEXT: lui a0, 512
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 1024
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 2048
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 4096
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: lui a0, 8192
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: lui a0, 16384
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
-; RV64I-NEXT: lui a0, 32768
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 65536
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 131072
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 262144
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 216(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: ld a0, 200(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
-; RV64I-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: ld a0, 144(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
-; RV64I-NEXT: ld a0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: ld a0, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
+; RV64I-NEXT: seqz a0, t0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 50
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
; RV64I-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 51
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 544(sp) # 8-byte Folded Spill
; RV64I-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s4, a0
-; RV64I-NEXT: and a1, s3, s6
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s7
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s11
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s9
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s10
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s0, a0
-; RV64I-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sd s5, 0(a1)
-; RV64I-NEXT: sd a0, 8(a1)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 52
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 552(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 53
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 624(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a0, t6
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 54
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 640(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a0, s0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli s0, ra, 55
+; RV64I-NEXT: and a0, a0, s0
+; RV64I-NEXT: sd a0, 648(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a0, s2
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 56
+; RV64I-NEXT: and s4, a0, a3
+; RV64I-NEXT: seqz a2, t5
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 57
+; RV64I-NEXT: and s3, a2, a3
+; RV64I-NEXT: seqz a3, t4
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: slli a5, ra, 58
+; RV64I-NEXT: and s5, a3, a5
+; RV64I-NEXT: seqz a4, t3
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 59
+; RV64I-NEXT: and s8, a4, a5
+; RV64I-NEXT: ld a0, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a0
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli t0, ra, 60
+; RV64I-NEXT: and s10, a5, t0
+; RV64I-NEXT: seqz t0, t2
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: slli a1, ra, 61
+; RV64I-NEXT: and s9, t0, a1
+; RV64I-NEXT: slli t6, ra, 62
+; RV64I-NEXT: seqz a1, a7
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a0, a1, t6
+; RV64I-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 496(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 480(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, a0, a1
+; RV64I-NEXT: ld a0, 944(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, a1, a0
+; RV64I-NEXT: ld a0, 816(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, a1, a0
+; RV64I-NEXT: ld a0, 784(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, a1, a0
+; RV64I-NEXT: ld a0, 760(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 504(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, a1, a0
+; RV64I-NEXT: ld a0, 728(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, a1, a0
+; RV64I-NEXT: ld a0, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 384(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, a0, s0
+; RV64I-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 280(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, a0, s1
+; RV64I-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sd s5, 0(a1)
-; RV64I-NEXT: sd a0, 8(a1)
-; RV64I-NEXT: ld ra, 344(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 336(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 328(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 320(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 312(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 304(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 296(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 288(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 280(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 272(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 264(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s10, 256(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s11, 248(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 352
+; RV64I-NEXT: xor a7, a0, a1
+; RV64I-NEXT: ld a0, 272(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, a0, a1
+; RV64I-NEXT: ld a0, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, a6, a0
+; RV64I-NEXT: ld a0, 936(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: ld a1, 856(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 808(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a3, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 696(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a4, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 592(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a6, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, a6
+; RV64I-NEXT: ld a6, 536(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld ra, 520(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a6, ra
+; RV64I-NEXT: xor s7, s7, s6
+; RV64I-NEXT: xor s3, s4, s3
+; RV64I-NEXT: xor t2, s2, t2
+; RV64I-NEXT: ld s2, 840(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t3, s2
+; RV64I-NEXT: ld s2, 800(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t4, s2
+; RV64I-NEXT: ld s2, 768(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t5, s2
+; RV64I-NEXT: ld s2, 736(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, t6, s2
+; RV64I-NEXT: ld s2, 608(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s0, s2
+; RV64I-NEXT: ld s2, 344(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, s1, s2
+; RV64I-NEXT: ld s2, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, s2
+; RV64I-NEXT: ld s2, 304(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, s2
+; RV64I-NEXT: xor a0, t1, a0
+; RV64I-NEXT: ld t1, 880(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, t1
+; RV64I-NEXT: ld t1, 848(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, t1
+; RV64I-NEXT: ld t1, 720(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a3, t1
+; RV64I-NEXT: ld t1, 656(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a4, t1
+; RV64I-NEXT: ld t1, 464(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, t1
+; RV64I-NEXT: ld t1, 560(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a6, t1
+; RV64I-NEXT: ld t1, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, s7, t1
+; RV64I-NEXT: xor s2, s3, s5
+; RV64I-NEXT: xor t2, t2, t3
+; RV64I-NEXT: ld t3, 824(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t4, t3
+; RV64I-NEXT: ld t4, 776(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t5, t4
+; RV64I-NEXT: ld t5, 744(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t6, t5
+; RV64I-NEXT: ld t6, 672(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s0, t6
+; RV64I-NEXT: ld s0, 392(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s1, s0
+; RV64I-NEXT: ld s1, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, s1
+; RV64I-NEXT: ld s1, 312(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, s1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 872(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 832(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 704(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 488(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 568(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: ld a6, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, t1, a6
+; RV64I-NEXT: xor t1, s2, s8
+; RV64I-NEXT: xor t2, t2, t3
+; RV64I-NEXT: ld t3, 792(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t4, t3
+; RV64I-NEXT: ld t4, 752(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t5, t4
+; RV64I-NEXT: ld t5, 688(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t6, t5
+; RV64I-NEXT: ld t6, 528(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s0, t6
+; RV64I-NEXT: ld s0, 320(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, s0
+; RV64I-NEXT: ld s0, 328(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, s0
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 864(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 712(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 512(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 576(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 544(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: xor a6, t1, s10
+; RV64I-NEXT: xor t1, t2, t3
+; RV64I-NEXT: ld t2, 952(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t4, t2
+; RV64I-NEXT: ld t3, 896(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t5, t3
+; RV64I-NEXT: ld t4, 632(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t6, t4
+; RV64I-NEXT: ld t5, 352(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, t5
+; RV64I-NEXT: ld t5, 336(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, t5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 928(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 912(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 584(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 552(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: xor a5, a6, s9
+; RV64I-NEXT: xor a6, t1, t2
+; RV64I-NEXT: ld t1, 904(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t3, t1
+; RV64I-NEXT: ld t2, 664(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t4, t2
+; RV64I-NEXT: ld t3, 376(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, t3
+; RV64I-NEXT: ld t3, 360(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, t3
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 920(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 600(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 624(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: xor a5, a6, t1
+; RV64I-NEXT: ld a6, 680(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, t2, a6
+; RV64I-NEXT: ld t1, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, t1
+; RV64I-NEXT: ld t1, 960(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, t1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 616(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 640(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 888(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: xor a4, a5, a6
+; RV64I-NEXT: ld a5, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a7, a5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 648(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: xor a4, a4, a5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: xor a1, a4, t0
+; RV64I-NEXT: xor a0, a0, a3
+; RV64I-NEXT: ld a2, 968(sp) # 8-byte Folded Reload
+; RV64I-NEXT: sd a1, 0(a2)
+; RV64I-NEXT: sd a0, 8(a2)
+; RV64I-NEXT: ld a2, 976(sp) # 8-byte Folded Reload
+; RV64I-NEXT: sd a1, 0(a2)
+; RV64I-NEXT: sd a0, 8(a2)
+; RV64I-NEXT: ld ra, 1080(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 1072(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 1064(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 1056(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 1048(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 1040(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 1032(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 1024(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 1016(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 1008(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 1000(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 992(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 984(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 1088
; RV64I-NEXT: ret
;
; RV32IM-LABEL: commutative_clmul_v2i64:
@@ -13246,2217 +15145,3464 @@ define void @commutative_clmul_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %p
define void @mul_use_commutative_clmul_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %p1) nounwind {
; RV32I-LABEL: mul_use_commutative_clmul_v2i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -368
-; RV32I-NEXT: sw ra, 364(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 360(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 356(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 352(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 348(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 344(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 340(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 336(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 332(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 328(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 324(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s10, 320(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s11, 316(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a3, 268(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a2, 264(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s8, 0(a1)
-; RV32I-NEXT: lw s11, 4(a1)
-; RV32I-NEXT: lw s7, 8(a1)
-; RV32I-NEXT: lw s6, 12(a1)
-; RV32I-NEXT: lw s3, 0(a0)
-; RV32I-NEXT: lw s0, 4(a0)
-; RV32I-NEXT: lw s2, 8(a0)
+; RV32I-NEXT: addi sp, sp, -816
+; RV32I-NEXT: sw ra, 812(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 808(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 804(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 800(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 796(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 792(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 788(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 784(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 780(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s8, 776(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s9, 772(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s10, 768(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s11, 764(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a3, 732(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a2, 728(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t2, 0(a1)
+; RV32I-NEXT: lw a7, 4(a1)
+; RV32I-NEXT: lw a6, 8(a1)
+; RV32I-NEXT: lw s5, 12(a1)
+; RV32I-NEXT: lw t0, 0(a0)
+; RV32I-NEXT: lw t1, 4(a0)
+; RV32I-NEXT: lw t3, 8(a0)
; RV32I-NEXT: lw s4, 12(a0)
-; RV32I-NEXT: lui a2, 16
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: lui t0, 349525
-; RV32I-NEXT: addi a7, a2, -256
-; RV32I-NEXT: sw a7, 280(sp) # 4-byte Folded Spill
-; RV32I-NEXT: addi a6, a0, -241
-; RV32I-NEXT: addi a5, a1, 819
-; RV32I-NEXT: srli a0, s3, 8
-; RV32I-NEXT: srli a1, s3, 24
-; RV32I-NEXT: and a2, s3, a7
-; RV32I-NEXT: slli a3, s3, 24
-; RV32I-NEXT: srli a4, s8, 8
-; RV32I-NEXT: and a0, a0, a7
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, s8, 24
-; RV32I-NEXT: slli a2, a2, 8
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: and a3, s8, a7
-; RV32I-NEXT: and a4, a4, a7
-; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: slli a4, s8, 24
+; RV32I-NEXT: lui s9, 16
+; RV32I-NEXT: addi s9, s9, -256
+; RV32I-NEXT: srli t4, t0, 8
+; RV32I-NEXT: srli s0, t0, 24
+; RV32I-NEXT: and a3, t0, s9
+; RV32I-NEXT: slli a1, t0, 24
+; RV32I-NEXT: srli s2, t2, 8
+; RV32I-NEXT: srli s3, t2, 24
+; RV32I-NEXT: and s1, t2, s9
+; RV32I-NEXT: slli t6, t2, 24
+; RV32I-NEXT: slli a5, t1, 1
+; RV32I-NEXT: andi s6, t2, 2
+; RV32I-NEXT: slli t5, t1, 2
+; RV32I-NEXT: andi a0, t2, 4
+; RV32I-NEXT: slli s7, t1, 3
+; RV32I-NEXT: andi a4, t2, 8
+; RV32I-NEXT: slli a2, t0, 1
+; RV32I-NEXT: srli s8, t3, 8
+; RV32I-NEXT: and t4, t4, s9
+; RV32I-NEXT: or t4, t4, s0
+; RV32I-NEXT: srli s0, t3, 24
+; RV32I-NEXT: and s2, s2, s9
+; RV32I-NEXT: or s2, s2, s3
+; RV32I-NEXT: srli s3, a6, 8
+; RV32I-NEXT: slli s1, s1, 8
+; RV32I-NEXT: or t6, t6, s1
+; RV32I-NEXT: srli s1, a6, 24
+; RV32I-NEXT: and s8, s8, s9
+; RV32I-NEXT: or s0, s8, s0
+; RV32I-NEXT: and s8, a6, s9
+; RV32I-NEXT: and s3, s3, s9
+; RV32I-NEXT: sw s9, 724(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or s1, s3, s1
+; RV32I-NEXT: slli s3, a6, 24
+; RV32I-NEXT: slli s8, s8, 8
+; RV32I-NEXT: or s3, s3, s8
+; RV32I-NEXT: andi s8, a7, 2
; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: addi a4, t0, 1365
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: sw a6, 272(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a0, a0, a6
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, a6
-; RV32I-NEXT: and a2, a2, a6
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: and a3, a3, a6
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: sw a5, 276(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a0, a0, a5
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, a5
-; RV32I-NEXT: and a2, a2, a5
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, a5
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 1
-; RV32I-NEXT: sw a4, 284(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a0, a0, a4
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, a4
-; RV32I-NEXT: and a2, a2, a4
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, a4
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: or s1, a2, a0
-; RV32I-NEXT: or s9, a3, a1
-; RV32I-NEXT: andi a1, s9, 2
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: andi a1, s9, 1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s10, a0, s5
-; RV32I-NEXT: andi a1, s9, 4
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: andi a1, s9, 8
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: andi a1, s9, 16
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: andi a1, s9, 32
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s9, 64
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: andi a1, s9, 128
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: andi a1, s9, 256
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s9, 512
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s9, 1024
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a1, s9, a1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: lui a1, 1024
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: and a1, s9, a1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: lui a1, 131072
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: and a1, s9, a1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s5, a0
-; RV32I-NEXT: lui a4, 349525
-; RV32I-NEXT: addi a4, a4, 1364
-; RV32I-NEXT: sw a4, 128(sp) # 4-byte Folded Spill
-; RV32I-NEXT: xor a0, s10, a0
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: lw a3, 280(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: slli a2, a0, 24
+; RV32I-NEXT: or a3, a1, a3
+; RV32I-NEXT: or a3, a3, t4
+; RV32I-NEXT: sw a3, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a3, t3, s9
+; RV32I-NEXT: or t4, t6, s2
+; RV32I-NEXT: sw t4, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s9, t3, 24
+; RV32I-NEXT: slli a3, a3, 8
+; RV32I-NEXT: or a3, s9, a3
+; RV32I-NEXT: or a3, a3, s0
+; RV32I-NEXT: sw a3, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 2
+; RV32I-NEXT: or t4, s3, s1
+; RV32I-NEXT: sw t4, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi t4, a7, 4
+; RV32I-NEXT: seqz s0, s6
+; RV32I-NEXT: seqz t6, s8
+; RV32I-NEXT: addi s0, s0, -1
+; RV32I-NEXT: addi t6, t6, -1
+; RV32I-NEXT: and s2, s0, a5
+; RV32I-NEXT: and t6, t6, a2
+; RV32I-NEXT: and a2, s0, a2
+; RV32I-NEXT: sw a2, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi t4, t4, -1
+; RV32I-NEXT: and s3, a0, t5
+; RV32I-NEXT: and s0, t4, a3
+; RV32I-NEXT: and t4, a0, a3
+; RV32I-NEXT: andi a0, a7, 8
+; RV32I-NEXT: seqz a3, a4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and s10, a3, s7
+; RV32I-NEXT: and s6, a0, a2
+; RV32I-NEXT: and a2, a3, a2
+; RV32I-NEXT: sw a2, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t2, 16
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, a7, 16
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 4
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 4
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and s7, a0, a3
+; RV32I-NEXT: andi a0, t2, 32
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, a7, 32
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 5
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 5
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 640(sp) # 4-byte Folded Spill
; RV32I-NEXT: and a0, a0, a3
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: lw a2, 272(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t2, 64
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, a7, 64
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 6
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 6
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t2, 128
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, a7, 128
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 7
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 7
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t2, 256
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, a7, 256
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 8
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 8
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t2, 512
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, a7, 512
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 9
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 9
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t2, 1024
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, a7, 1024
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 10
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 10
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a0, 1
+; RV32I-NEXT: slli a4, a0, 11
+; RV32I-NEXT: and a0, t2, a4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a4
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 11
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 11
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 1
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 12
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 12
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 2
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 13
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 13
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 4
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 14
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 14
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 8
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 15
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 15
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 16
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 456(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 16
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 460(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 32
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 17
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 444(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 17
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 452(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 64
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 18
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 484(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 18
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 496(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 128
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 19
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 532(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 19
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 256
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 20
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 560(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 20
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 564(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 512
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 21
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 21
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 1024
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 22
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 408(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 22
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 2048
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 23
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 400(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a3, t0, 23
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 404(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 4096
+; RV32I-NEXT: and a0, t2, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 24
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: sw a3, 420(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, a2, a1
+; RV32I-NEXT: sw a2, 428(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 8192
+; RV32I-NEXT: and a0, t2, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, a7, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 25
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 440(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 25
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 448(sp) # 4-byte Folded Spill
; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 16384
+; RV32I-NEXT: and a0, t2, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, a7, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 26
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 468(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 26
; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: lw a2, 276(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a1, 472(sp) # 4-byte Folded Spill
; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 32768
+; RV32I-NEXT: and a0, t2, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, a7, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 27
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 480(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 27
; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lw a2, 284(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
; RV32I-NEXT: and a0, a0, a2
-; RV32I-NEXT: and a1, a1, a4
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
-; RV32I-NEXT: andi a1, s8, 2
-; RV32I-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 1
-; RV32I-NEXT: sw a1, 256(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, a0, s1
-; RV32I-NEXT: andi a1, s8, 4
-; RV32I-NEXT: sw a1, 252(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 8
-; RV32I-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: andi a1, s8, 16
-; RV32I-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 32
-; RV32I-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 64
-; RV32I-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: andi a1, s8, 128
-; RV32I-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 256
-; RV32I-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 512
-; RV32I-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 1024
-; RV32I-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: lw s5, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, s8, s5
-; RV32I-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: lui s10, 16
-; RV32I-NEXT: and a1, s8, s10
-; RV32I-NEXT: sw a1, 196(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 188(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 180(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 172(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 164(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 156(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 148(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s1, s9, a0
-; RV32I-NEXT: andi a1, s11, 2
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s11, 1
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s8, a0, s0
-; RV32I-NEXT: andi a1, s11, 4
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s11, 8
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: andi a1, s11, 16
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s11, 32
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s11, 64
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: andi a1, s11, 128
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s11, 256
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s11, 512
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s11, 1024
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: and a1, s11, s5
-; RV32I-NEXT: mv s9, s5
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: and a1, s11, s10
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: srli a1, s2, 8
-; RV32I-NEXT: srli a2, s2, 24
-; RV32I-NEXT: lw a6, 280(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a3, s2, a6
-; RV32I-NEXT: xor a0, s8, a0
-; RV32I-NEXT: slli a4, s2, 24
-; RV32I-NEXT: xor a0, a0, s1
-; RV32I-NEXT: srli a5, s7, 8
-; RV32I-NEXT: and a1, a1, a6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: srli a2, s7, 24
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: and a4, s7, a6
-; RV32I-NEXT: and a5, a5, a6
-; RV32I-NEXT: or a2, a5, a2
-; RV32I-NEXT: slli a5, s7, 24
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lw a5, 132(sp) # 4-byte Folded Reload
-; RV32I-NEXT: xor a0, a5, a0
-; RV32I-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: or a2, a4, a2
-; RV32I-NEXT: srli a0, a1, 4
-; RV32I-NEXT: lw s11, 272(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, s11
-; RV32I-NEXT: srli a3, a2, 4
-; RV32I-NEXT: and a2, a2, s11
-; RV32I-NEXT: and a0, a0, s11
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: and a3, a3, s11
-; RV32I-NEXT: slli a2, a2, 4
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: lw s10, 276(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a0, a0, s10
-; RV32I-NEXT: srli a3, a2, 2
-; RV32I-NEXT: and a2, a2, s10
-; RV32I-NEXT: and a1, a1, s10
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s10
-; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lw a4, 284(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a0, a0, a4
-; RV32I-NEXT: srli a3, a2, 1
-; RV32I-NEXT: and a2, a2, a4
-; RV32I-NEXT: and a1, a1, a4
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, a4
-; RV32I-NEXT: slli a2, a2, 1
-; RV32I-NEXT: or s0, a1, a0
-; RV32I-NEXT: or s5, a3, a2
+; RV32I-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 65536
+; RV32I-NEXT: and a0, t2, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, a7, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 28
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 512(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 28
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 508(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 131072
+; RV32I-NEXT: and a0, t2, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, a7, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 29
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 372(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 29
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 376(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 262144
+; RV32I-NEXT: and a0, t2, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, a7, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 30
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 364(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 30
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 368(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t2, 1
+; RV32I-NEXT: srli a1, t2, 31
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a2, a7, 1
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t1, 31
+; RV32I-NEXT: and a5, a0, t1
+; RV32I-NEXT: sw a5, 352(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and t2, a2, t0
+; RV32I-NEXT: and t1, a0, t0
+; RV32I-NEXT: slli t0, t0, 31
+; RV32I-NEXT: srli a0, a7, 31
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a3, a1, a3
+; RV32I-NEXT: sw a3, 380(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, t0
+; RV32I-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a1, t0
+; RV32I-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 2
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s5, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s8, a0, s1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 1
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 340(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 1
+; RV32I-NEXT: and t0, a1, a2
+; RV32I-NEXT: and a7, a0, a2
+; RV32I-NEXT: andi a0, a6, 4
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 2
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 320(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 328(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 8
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s8, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 3
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 304(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 3
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 16
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 4
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 276(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 4
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 292(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 32
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 5
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 256(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 5
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 64
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s8, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 6
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 316(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 6
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 324(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 128
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 7
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 232(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 7
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 256
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 8
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 216(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 8
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 512
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 9
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 252(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 9
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 1024
+; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: andi a1, s5, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: and a1, s5, s9
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s5, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s8, a0
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: lw a3, 280(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: and a3, a0, a3
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s11
-; RV32I-NEXT: and a1, a1, s11
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s10
-; RV32I-NEXT: and a1, a1, s10
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lw a2, 284(sp) # 4-byte Folded Reload
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 10
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 336(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 10
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 344(sp) # 4-byte Folded Spill
; RV32I-NEXT: and a0, a0, a2
-; RV32I-NEXT: lw a2, 128(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a4, 436(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a6, a4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a4
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 11
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 11
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a0, a2
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 12
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 144(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 12
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 172(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 2
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 13
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 188(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 13
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 4
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 14
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 248(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 14
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 8
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 15
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 280(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 15
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 288(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 16
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 84(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 16
; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: andi a1, s7, 2
-; RV32I-NEXT: sw a1, 284(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 1
-; RV32I-NEXT: sw a1, 280(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, a0, s0
-; RV32I-NEXT: andi a1, s7, 4
-; RV32I-NEXT: sw a1, 276(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 8
-; RV32I-NEXT: sw a1, 272(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 16
-; RV32I-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 32
-; RV32I-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 64
-; RV32I-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 128
-; RV32I-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 256
-; RV32I-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 512
-; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 1024
-; RV32I-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: and a1, s7, s9
-; RV32I-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s11, s9
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s7, a0
; RV32I-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s7, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 32
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 17
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 17
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s5, s1, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s7, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 64
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 18
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 120(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 18
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 128
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 19
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 176(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 19
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 180(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 256
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 20
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 192(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 20
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 196(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 512
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 21
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 208(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 21
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 220(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 1024
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 22
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 22
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s7, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 2048
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 23
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 23
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s0, a0
-; RV32I-NEXT: lui s0, 32768
-; RV32I-NEXT: and a1, s7, s0
-; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s10, s9, a0
-; RV32I-NEXT: lui s9, 65536
-; RV32I-NEXT: and a1, s7, s9
-; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s10, a0
-; RV32I-NEXT: xor s8, s5, a0
-; RV32I-NEXT: lui s10, 131072
-; RV32I-NEXT: and a1, s7, s10
-; RV32I-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s5, a0
-; RV32I-NEXT: lui s5, 524288
-; RV32I-NEXT: and a1, s7, s5
-; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s7, s8, a0
-; RV32I-NEXT: andi a1, s6, 2
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s6, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, a0, s4
-; RV32I-NEXT: andi a1, s6, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s6, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s6, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s6, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s6, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s6, 128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s6, 256
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s6, 512
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s6, 1024
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: and a1, s6, s11
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: and a1, s6, s0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: and a1, s6, s9
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: and a1, s6, s10
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 4096
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 24
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, s9
+; RV32I-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, s9
+; RV32I-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 8192
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 25
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 80(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 25
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 16384
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 26
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 104(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 26
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 32768
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 27
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 124(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 27
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 136(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 65536
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s4, 28
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 152(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t3, 28
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 148(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 131072
+; RV32I-NEXT: and a0, a6, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s5, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a2, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a0, s4, 29
+; RV32I-NEXT: and s11, a2, a0
+; RV32I-NEXT: slli a3, t3, 29
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: and a1, s6, s5
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: and a2, a6, a0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: and s1, s5, a0
+; RV32I-NEXT: seqz s1, s1
+; RV32I-NEXT: addi a0, a2, -1
+; RV32I-NEXT: addi s1, s1, -1
+; RV32I-NEXT: slli a5, s4, 30
+; RV32I-NEXT: and s8, a0, a5
+; RV32I-NEXT: slli a2, t3, 30
+; RV32I-NEXT: and a5, s1, a2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a6, 1
+; RV32I-NEXT: srli a6, a6, 31
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi ra, s5, 1
+; RV32I-NEXT: seqz ra, ra
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi ra, ra, -1
+; RV32I-NEXT: slli t5, s4, 31
+; RV32I-NEXT: and s4, a0, s4
+; RV32I-NEXT: and ra, ra, t3
+; RV32I-NEXT: and a2, a0, t3
+; RV32I-NEXT: slli t3, t3, 31
+; RV32I-NEXT: srli a3, s5, 31
+; RV32I-NEXT: seqz a6, a6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi s5, a6, -1
+; RV32I-NEXT: addi a6, a3, -1
+; RV32I-NEXT: and a0, s5, t5
+; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a6, t3
+; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s5, t3
+; RV32I-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, s2
+; RV32I-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, s3, s10
+; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, t2, t6
+; RV32I-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, s0, s6
+; RV32I-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, a0, s7
-; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: xor s6, a1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 320(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, a0, a1
+; RV32I-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, a0, s4
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 188(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 180(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 176(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 168(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 156(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 152(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 148(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 140(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s4, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 284(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 280(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, a0, s3
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 276(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: xor s10, a0, a1
+; RV32I-NEXT: xor a0, s11, s8
+; RV32I-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor s11, ra, t0
+; RV32I-NEXT: lw a0, 328(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 308(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a0, a1
+; RV32I-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 268(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, a0, a1
+; RV32I-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 228(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, a0, s0
+; RV32I-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 160(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, a0, s1
+; RV32I-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, a0, a1
+; RV32I-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, a0, a1
+; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, a0, a5
+; RV32I-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, t1, a0
+; RV32I-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t4, a0
+; RV32I-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, s7, a0
+; RV32I-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a0, a1
+; RV32I-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, a0, a1
+; RV32I-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, a0, a1
+; RV32I-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 416(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a0, a1
+; RV32I-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, a0, a1
+; RV32I-NEXT: xor a7, a2, a7
+; RV32I-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 312(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 296(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 244(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 240(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: lw a3, 172(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a4, a3
+; RV32I-NEXT: lw a4, 112(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 96(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a5, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a6, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a6
+; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, s7
+; RV32I-NEXT: lw s7, 352(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s7, s8
+; RV32I-NEXT: sw s7, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 484(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 420(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 456(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 380(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 444(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 408(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s7, s8
+; RV32I-NEXT: sw s7, 476(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 460(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 456(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 452(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 496(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 400(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 444(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 428(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 428(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 388(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 404(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, s7, s9
+; RV32I-NEXT: lw s7, 316(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 376(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 252(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 372(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 188(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 368(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 120(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 364(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, s10, s7
+; RV32I-NEXT: lw s7, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 340(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s8, s7
+; RV32I-NEXT: sw s7, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor s8, s11, ra
+; RV32I-NEXT: lw s7, 324(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s6, s7
+; RV32I-NEXT: sw s6, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 264(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s0, s6
+; RV32I-NEXT: sw s0, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 200(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, s1, s0
+; RV32I-NEXT: lw s0, 132(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s2, s0
+; RV32I-NEXT: sw s0, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s3, s0
+; RV32I-NEXT: sw s0, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s4, s0
+; RV32I-NEXT: sw s0, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor t0, s5, t0
+; RV32I-NEXT: sw t0, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, t0
+; RV32I-NEXT: sw t0, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t2, t0
+; RV32I-NEXT: sw t0, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t3, t0
+; RV32I-NEXT: sw t0, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t4, t0
+; RV32I-NEXT: sw t0, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 432(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t5, t0
+; RV32I-NEXT: sw t0, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 396(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t6, t0
+; RV32I-NEXT: sw t0, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, a7, a0
+; RV32I-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a0
+; RV32I-NEXT: lw a0, 224(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a3, a0
+; RV32I-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a4, a0
+; RV32I-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a5, a0
+; RV32I-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a6, a0
+; RV32I-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a3, 61681
+; RV32I-NEXT: addi a3, a3, -241
+; RV32I-NEXT: sw a3, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, a0, 4
+; RV32I-NEXT: and t0, a0, a3
+; RV32I-NEXT: and a6, a6, a3
+; RV32I-NEXT: slli t0, t0, 4
+; RV32I-NEXT: or a0, a6, t0
+; RV32I-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, a0, 4
+; RV32I-NEXT: and t1, a0, a3
+; RV32I-NEXT: and a6, a6, a3
+; RV32I-NEXT: slli t1, t1, 4
+; RV32I-NEXT: or a0, a6, t1
+; RV32I-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a1, a0
+; RV32I-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, a1, a0
+; RV32I-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t2, a0, 4
+; RV32I-NEXT: and t3, a0, a3
+; RV32I-NEXT: and t2, t2, a3
+; RV32I-NEXT: slli t3, t3, 4
+; RV32I-NEXT: or a0, t2, t3
+; RV32I-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t3, a0, 4
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: and t3, t3, a3
+; RV32I-NEXT: slli a0, a0, 4
+; RV32I-NEXT: or a0, t3, a0
+; RV32I-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, s9, a0
+; RV32I-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, a1, a0
+; RV32I-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, a1, a0
+; RV32I-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, a1, a0
+; RV32I-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s10, a0
+; RV32I-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, s8, a0
+; RV32I-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 628(sp) # 4-byte Folded Reload
; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
+; RV32I-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw s4, 0(a0)
-; RV32I-NEXT: lw s0, 132(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, a1, a0
+; RV32I-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a1, a0
+; RV32I-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, a0, a1
+; RV32I-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a1, a0
+; RV32I-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, a1, a0
+; RV32I-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a1, a0
+; RV32I-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a1, a0
+; RV32I-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a0, a1
+; RV32I-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a2, a0
+; RV32I-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a0
+; RV32I-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a1, a0
+; RV32I-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 484(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a0, ra
+; RV32I-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 468(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, s8, a2
+; RV32I-NEXT: lw s8, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, s8, s11
+; RV32I-NEXT: lw s8, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: lw s9, 564(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, s10, s9
+; RV32I-NEXT: lw s10, 472(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, s7, s10
+; RV32I-NEXT: lw s7, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, s7, s5
+; RV32I-NEXT: lw s7, 280(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s6, s7
+; RV32I-NEXT: lw s7, 192(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s2, s7
+; RV32I-NEXT: lw s7, 104(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, s7
+; RV32I-NEXT: xor s0, s4, s0
+; RV32I-NEXT: lw s4, 288(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, s1, s4
+; RV32I-NEXT: lw s4, 196(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, s4
+; RV32I-NEXT: lw s4, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, s4
+; RV32I-NEXT: xor t2, t6, t2
+; RV32I-NEXT: lw t6, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, t6
+; RV32I-NEXT: lw t6, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, t6
+; RV32I-NEXT: lw t6, 492(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t6
+; RV32I-NEXT: xor a4, t1, a4
+; RV32I-NEXT: lw t1, 300(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t1
+; RV32I-NEXT: lw t1, 204(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, t1
+; RV32I-NEXT: lw t1, 116(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, t1
+; RV32I-NEXT: xor a0, ra, a0
+; RV32I-NEXT: lw t1, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, t1
+; RV32I-NEXT: lw t1, 480(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a2, t1
+; RV32I-NEXT: xor t6, s11, s8
+; RV32I-NEXT: lw a2, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, s9, a2
+; RV32I-NEXT: lw a2, 488(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s10, a2
+; RV32I-NEXT: xor s5, s5, s6
+; RV32I-NEXT: lw a2, 208(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s2, a2
+; RV32I-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, a2
+; RV32I-NEXT: xor s0, s0, s1
+; RV32I-NEXT: lw a2, 212(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, a2
+; RV32I-NEXT: lw a2, 128(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, a2
+; RV32I-NEXT: xor t2, t2, t3
+; RV32I-NEXT: lw a2, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, a2
+; RV32I-NEXT: lw a2, 504(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, a2
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a2, 220(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a6, a2
+; RV32I-NEXT: lw a2, 136(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a2
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi s8, a2, 819
+; RV32I-NEXT: lw a2, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, a2, 2
+; RV32I-NEXT: and t3, a2, s8
+; RV32I-NEXT: and a6, a6, s8
+; RV32I-NEXT: slli t3, t3, 2
+; RV32I-NEXT: or a6, a6, t3
+; RV32I-NEXT: lw a2, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t3, a2, 2
+; RV32I-NEXT: and s1, a2, s8
+; RV32I-NEXT: and t3, t3, s8
+; RV32I-NEXT: slli s1, s1, 2
+; RV32I-NEXT: or t3, t3, s1
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, t1, a1
+; RV32I-NEXT: xor t1, t6, s4
+; RV32I-NEXT: lw a2, 508(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, s7, a2
+; RV32I-NEXT: lw a2, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli s1, a2, 2
+; RV32I-NEXT: and s4, a2, s8
+; RV32I-NEXT: and s1, s1, s8
+; RV32I-NEXT: slli s4, s4, 2
+; RV32I-NEXT: or s1, s1, s4
+; RV32I-NEXT: lw a2, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli s4, a2, 2
+; RV32I-NEXT: and s6, a2, s8
+; RV32I-NEXT: and s4, s4, s8
+; RV32I-NEXT: slli s6, s6, 2
+; RV32I-NEXT: or s4, s4, s6
+; RV32I-NEXT: xor s2, s5, s2
+; RV32I-NEXT: lw a2, 152(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, a2
+; RV32I-NEXT: xor t4, s0, t4
+; RV32I-NEXT: lw a2, 148(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, a2
+; RV32I-NEXT: xor a7, t2, a7
+; RV32I-NEXT: lw a2, 524(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, a2
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a2, 156(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a1, t1, t6
+; RV32I-NEXT: xor a5, s2, s3
+; RV32I-NEXT: xor t1, t4, t5
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: xor a3, a4, a3
+; RV32I-NEXT: lw a2, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: lw a2, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a1, a2
+; RV32I-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a1
+; RV32I-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t1, a1
+; RV32I-NEXT: lw a1, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a7, a1
+; RV32I-NEXT: sw a1, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a3, a1
+; RV32I-NEXT: sw a1, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s2, 349525
+; RV32I-NEXT: addi s2, s2, 1365
+; RV32I-NEXT: srli a1, a6, 1
+; RV32I-NEXT: and a3, a6, s2
+; RV32I-NEXT: and a1, a1, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: xor a0, t0, a0
+; RV32I-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a0, s1, 1
+; RV32I-NEXT: and s1, s1, s2
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli s1, s1, 1
+; RV32I-NEXT: or a4, a0, s1
+; RV32I-NEXT: xor a0, t1, a5
+; RV32I-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a0, t3, 1
+; RV32I-NEXT: and a3, t3, s2
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: srli a3, a3, 31
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 31
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a3, s4, 1
+; RV32I-NEXT: and a5, s4, s2
+; RV32I-NEXT: and a3, a3, s2
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: or t1, a3, a5
+; RV32I-NEXT: srli a5, a5, 31
+; RV32I-NEXT: seqz a3, a5
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a4, 31
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 2
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 1
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 4
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 2
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 8
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 3
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 16
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 4
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 32
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 5
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 64
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 6
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 128
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 7
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 256
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 8
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 512
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 9
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 1024
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 10
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a2, 436(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a3, a0, a2
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 11
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a6, 1
+; RV32I-NEXT: and a3, a0, a6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 12
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a7, 2
+; RV32I-NEXT: and a3, a0, a7
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 13
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t0, 4
+; RV32I-NEXT: and a3, a0, t0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 14
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t2, 8
+; RV32I-NEXT: and a3, a0, t2
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 15
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s9, 16
+; RV32I-NEXT: and a3, a0, s9
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 16
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t4, 32
+; RV32I-NEXT: and a3, a0, t4
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 17
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t5, 64
+; RV32I-NEXT: and a3, a0, t5
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 18
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t6, 128
+; RV32I-NEXT: and a3, a0, t6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 19
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s0, 256
+; RV32I-NEXT: and a3, a0, s0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 20
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s1, 512
+; RV32I-NEXT: and a3, a0, s1
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 21
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s3, 1024
+; RV32I-NEXT: and a3, a0, s3
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 22
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s4, 2048
+; RV32I-NEXT: and a3, a0, s4
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 23
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s5, 4096
+; RV32I-NEXT: and a3, a0, s5
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 24
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s6, 8192
+; RV32I-NEXT: and a3, a0, s6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 25
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s7, 16384
+; RV32I-NEXT: and a3, a0, s7
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 26
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a3, 32768
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 27
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a3, 65536
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 28
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a3, 131072
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, a1, 29
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: sw a3, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a3, a0, 1
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and a3, a3, a1
+; RV32I-NEXT: sw a3, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a1, a1, 30
+; RV32I-NEXT: lui t3, 262144
+; RV32I-NEXT: and a0, a0, t3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 2
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 8
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 3
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 16
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 4
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 32
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 5
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 64
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 6
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 128
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 7
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 256
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 8
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 512
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 9
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t1, 1024
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 10
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, a2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 11
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, a6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 12
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, a7
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 13
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, t0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 14
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, t2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 15
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, s9
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 16
+; RV32I-NEXT: and s10, a0, a1
+; RV32I-NEXT: and a0, t1, t4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 17
+; RV32I-NEXT: and s9, a0, a1
+; RV32I-NEXT: and a0, t1, t5
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 18
+; RV32I-NEXT: and s11, a0, a1
+; RV32I-NEXT: and a0, t1, t6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 19
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, s0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli s0, a4, 20
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, s1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli s1, a4, 21
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, s3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, a4, 22
+; RV32I-NEXT: and s1, a0, a1
+; RV32I-NEXT: and a1, t1, s4
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a3, a4, 23
+; RV32I-NEXT: and s0, a1, a3
+; RV32I-NEXT: and a1, t1, s5
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a5, a4, 24
+; RV32I-NEXT: and s3, a1, a5
+; RV32I-NEXT: and a5, t1, s6
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a4, 25
+; RV32I-NEXT: and s4, a5, a6
+; RV32I-NEXT: and a6, t1, s7
+; RV32I-NEXT: seqz a6, a6
+; RV32I-NEXT: addi a6, a6, -1
+; RV32I-NEXT: slli a7, a4, 26
+; RV32I-NEXT: and s5, a6, a7
+; RV32I-NEXT: lui a0, 32768
+; RV32I-NEXT: and a6, t1, a0
+; RV32I-NEXT: seqz a6, a6
+; RV32I-NEXT: addi a6, a6, -1
+; RV32I-NEXT: slli t2, a4, 27
+; RV32I-NEXT: and s7, a6, t2
+; RV32I-NEXT: lui a0, 65536
+; RV32I-NEXT: and a6, t1, a0
+; RV32I-NEXT: seqz a6, a6
+; RV32I-NEXT: addi a6, a6, -1
+; RV32I-NEXT: slli t0, a4, 28
+; RV32I-NEXT: and s6, a6, t0
+; RV32I-NEXT: lui a0, 131072
+; RV32I-NEXT: and t0, t1, a0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: slli a2, a4, 29
+; RV32I-NEXT: and t6, t0, a2
+; RV32I-NEXT: and t0, t1, t3
+; RV32I-NEXT: andi t1, t1, 1
+; RV32I-NEXT: seqz t1, t1
+; RV32I-NEXT: addi t1, t1, -1
+; RV32I-NEXT: and t1, t1, a4
+; RV32I-NEXT: slli a4, a4, 30
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: and t4, t0, a4
+; RV32I-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a1, a0
+; RV32I-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a0, a1
+; RV32I-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a0, a1
+; RV32I-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 652(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a0, a1
+; RV32I-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, a0, a1
+; RV32I-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a4, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a0, a4
+; RV32I-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a0, a5
+; RV32I-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a0, a1
+; RV32I-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t1, a0
+; RV32I-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 532(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: lw a3, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw ra, 512(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, ra
+; RV32I-NEXT: xor s9, s10, s9
+; RV32I-NEXT: xor s0, s1, s0
+; RV32I-NEXT: xor t4, t6, t4
+; RV32I-NEXT: xor a7, t5, a7
+; RV32I-NEXT: lw t5, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t5
+; RV32I-NEXT: lw t5, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, t2, t5
+; RV32I-NEXT: lw t5, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, t5
+; RV32I-NEXT: lw t5, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t5
+; RV32I-NEXT: lw t5, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t5
+; RV32I-NEXT: lw t5, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, t5
+; RV32I-NEXT: xor a0, t1, a0
+; RV32I-NEXT: lw t1, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, t1
+; RV32I-NEXT: lw t1, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, t1
+; RV32I-NEXT: lw t1, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, t1
+; RV32I-NEXT: xor t1, s9, s11
+; RV32I-NEXT: xor t5, s0, s3
+; RV32I-NEXT: lw t6, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, t6
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t2, t0
+; RV32I-NEXT: lw t2, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, t3, t2
+; RV32I-NEXT: lw t3, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t3
+; RV32I-NEXT: lw t3, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t3
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 524(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a3, a2
+; RV32I-NEXT: lw a3, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, t1, a3
+; RV32I-NEXT: xor t1, t5, s4
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t2, t0
+; RV32I-NEXT: lw t2, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t2
+; RV32I-NEXT: lw t2, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 504(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a3, a2
+; RV32I-NEXT: xor a3, t1, s5
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t0
+; RV32I-NEXT: lw t0, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t0
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: xor a2, a3, s7
+; RV32I-NEXT: xor a3, a7, a4
+; RV32I-NEXT: lw a4, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a5, a4
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a1, a2, s6
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a1, a3, a6
+; RV32I-NEXT: xor a0, a0, t4
+; RV32I-NEXT: srli a2, a1, 8
+; RV32I-NEXT: srli a3, a1, 24
+; RV32I-NEXT: slli a4, a1, 24
+; RV32I-NEXT: lw a5, 724(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: and a2, a2, a5
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: srli a3, a0, 8
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: or a1, a4, a1
+; RV32I-NEXT: srli a4, a0, 24
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: or a3, a3, a4
+; RV32I-NEXT: and a4, a0, a5
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: slli a4, a4, 8
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: srli a2, a1, 4
+; RV32I-NEXT: lw a4, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: srli a3, a0, 4
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: slli a1, a1, 4
+; RV32I-NEXT: slli a0, a0, 4
+; RV32I-NEXT: or a1, a2, a1
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: srli a2, a1, 2
+; RV32I-NEXT: and a1, a1, s8
+; RV32I-NEXT: srli a3, a0, 2
+; RV32I-NEXT: and a0, a0, s8
+; RV32I-NEXT: and a2, a2, s8
+; RV32I-NEXT: and a3, a3, s8
+; RV32I-NEXT: slli a1, a1, 2
+; RV32I-NEXT: or a1, a2, a1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a2, a2, 1364
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: srli a3, a1, 1
+; RV32I-NEXT: and a1, a1, s2
+; RV32I-NEXT: and a4, a0, s2
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: and a3, a3, a2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: or a1, a3, a1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: srli a1, a1, 1
+; RV32I-NEXT: lw s0, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, a1, s0
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: lw s1, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, a0, s1
+; RV32I-NEXT: lw a0, 728(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw s3, 0(a0)
; RV32I-NEXT: sw s0, 4(a0)
-; RV32I-NEXT: sw s1, 8(a0)
-; RV32I-NEXT: sw s6, 12(a0)
-; RV32I-NEXT: addi a0, sp, 288
-; RV32I-NEXT: sw s4, 288(sp)
-; RV32I-NEXT: sw s0, 292(sp)
-; RV32I-NEXT: sw s1, 296(sp)
-; RV32I-NEXT: sw s6, 300(sp)
+; RV32I-NEXT: lw s2, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw s2, 8(a0)
+; RV32I-NEXT: sw s1, 12(a0)
+; RV32I-NEXT: addi a0, sp, 736
+; RV32I-NEXT: sw s3, 736(sp)
+; RV32I-NEXT: sw s0, 740(sp)
+; RV32I-NEXT: sw s2, 744(sp)
+; RV32I-NEXT: sw s1, 748(sp)
; RV32I-NEXT: call vector_use
-; RV32I-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw s4, 0(a0)
+; RV32I-NEXT: lw a0, 732(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw s3, 0(a0)
; RV32I-NEXT: sw s0, 4(a0)
-; RV32I-NEXT: sw s1, 8(a0)
-; RV32I-NEXT: sw s6, 12(a0)
-; RV32I-NEXT: lw ra, 364(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 360(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 356(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 352(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 348(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 344(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 340(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 336(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 332(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 328(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 324(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s10, 320(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s11, 316(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 368
+; RV32I-NEXT: sw s2, 8(a0)
+; RV32I-NEXT: sw s1, 12(a0)
+; RV32I-NEXT: lw ra, 812(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 808(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 804(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 800(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 796(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 792(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 788(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 784(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 780(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 776(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 772(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 768(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s11, 764(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 816
; RV32I-NEXT: ret
;
; RV64I-LABEL: mul_use_commutative_clmul_v2i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -352
-; RV64I-NEXT: sd ra, 344(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 336(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 328(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 320(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 312(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 304(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 296(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 288(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 280(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 272(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 264(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s10, 256(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s11, 248(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a5, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -1088
+; RV64I-NEXT: sd ra, 1080(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 1072(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 1064(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 1056(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 1048(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 1040(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 1032(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 1024(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 1016(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 1008(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 1000(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 992(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 984(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a5, 976(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a4, 968(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv ra, a1
+; RV64I-NEXT: mv s11, a0
+; RV64I-NEXT: slli t0, a0, 1
+; RV64I-NEXT: andi s10, a2, 2
+; RV64I-NEXT: slli a4, a0, 2
+; RV64I-NEXT: andi s5, a2, 4
+; RV64I-NEXT: slli a5, a0, 3
+; RV64I-NEXT: andi s6, a2, 8
+; RV64I-NEXT: slli a6, a0, 4
+; RV64I-NEXT: andi t4, a2, 16
+; RV64I-NEXT: slli a7, a0, 5
+; RV64I-NEXT: andi t6, a2, 32
+; RV64I-NEXT: slli a0, a0, 6
+; RV64I-NEXT: andi t3, a2, 64
+; RV64I-NEXT: slli t5, s11, 7
+; RV64I-NEXT: andi s3, a2, 128
+; RV64I-NEXT: slli t2, s11, 8
+; RV64I-NEXT: andi s2, a2, 256
+; RV64I-NEXT: slli a1, s11, 9
+; RV64I-NEXT: andi s4, a2, 512
+; RV64I-NEXT: slli t1, s11, 10
+; RV64I-NEXT: andi s0, a2, 1024
+; RV64I-NEXT: slli s1, s11, 31
+; RV64I-NEXT: sraiw s7, a2, 31
+; RV64I-NEXT: seqz s10, s10
+; RV64I-NEXT: addi s10, s10, -1
+; RV64I-NEXT: and t0, s10, t0
+; RV64I-NEXT: slli s10, s11, 63
+; RV64I-NEXT: seqz s5, s5
+; RV64I-NEXT: addi s5, s5, -1
+; RV64I-NEXT: and s5, s5, a4
+; RV64I-NEXT: srli s8, a2, 63
+; RV64I-NEXT: seqz a4, s6
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: slli s6, ra, 1
+; RV64I-NEXT: seqz a5, t4
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: and a5, a5, a6
+; RV64I-NEXT: andi t4, a3, 2
+; RV64I-NEXT: seqz a6, t6
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: slli t6, ra, 2
+; RV64I-NEXT: seqz a7, t3
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a7, a7, a0
+; RV64I-NEXT: sd a7, 904(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t3, a3, 4
+; RV64I-NEXT: seqz a7, s3
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a0, a7, t5
+; RV64I-NEXT: slli t5, ra, 3
+; RV64I-NEXT: seqz a7, s2
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a7, a7, t2
+; RV64I-NEXT: andi t2, a3, 8
+; RV64I-NEXT: seqz s2, s4
+; RV64I-NEXT: addi s2, s2, -1
+; RV64I-NEXT: and a1, s2, a1
+; RV64I-NEXT: sd a1, 896(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a1, ra, 4
+; RV64I-NEXT: seqz s0, s0
+; RV64I-NEXT: addi s0, s0, -1
+; RV64I-NEXT: and t1, s0, t1
+; RV64I-NEXT: sd t1, 944(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t1, a3, 16
+; RV64I-NEXT: seqz s0, s7
+; RV64I-NEXT: addi s0, s0, -1
+; RV64I-NEXT: and s0, s0, s1
+; RV64I-NEXT: sd s0, 952(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s0, ra, 5
+; RV64I-NEXT: seqz s1, s8
+; RV64I-NEXT: addi s1, s1, -1
+; RV64I-NEXT: and s1, s1, s10
+; RV64I-NEXT: sd s1, 960(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi s1, a3, 32
+; RV64I-NEXT: seqz t4, t4
+; RV64I-NEXT: addi t4, t4, -1
+; RV64I-NEXT: and t4, t4, s6
+; RV64I-NEXT: sd t4, 880(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t4, ra, 6
+; RV64I-NEXT: seqz t3, t3
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: and t3, t3, t6
+; RV64I-NEXT: sd t3, 872(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t3, a3, 64
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and t2, t2, t5
+; RV64I-NEXT: sd t2, 864(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t2, ra, 7
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: and a1, t1, a1
+; RV64I-NEXT: sd a1, 856(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t1, a3, 128
+; RV64I-NEXT: seqz t5, s1
+; RV64I-NEXT: addi t5, t5, -1
+; RV64I-NEXT: and a1, t5, s0
+; RV64I-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t5, ra, 8
+; RV64I-NEXT: seqz t3, t3
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: and a1, t3, t4
+; RV64I-NEXT: sd a1, 920(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t3, a3, 256
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: and a1, t1, t2
+; RV64I-NEXT: sd a1, 832(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t1, ra, 9
+; RV64I-NEXT: seqz t2, t3
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and a1, t2, t5
+; RV64I-NEXT: sd a1, 808(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t2, a3, 512
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and a1, t2, t1
+; RV64I-NEXT: sd a1, 912(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t1, a3, 1024
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: slli t2, ra, 10
+; RV64I-NEXT: and a1, t1, t2
+; RV64I-NEXT: sd a1, 936(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sraiw t1, a3, 31
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: slli t2, ra, 31
+; RV64I-NEXT: and a1, t1, t2
+; RV64I-NEXT: sd a1, 928(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli t1, a3, 63
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: slli t2, ra, 63
+; RV64I-NEXT: and a1, t1, t2
+; RV64I-NEXT: sd a1, 888(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi t1, a2, 1
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: and t1, t1, s11
+; RV64I-NEXT: xor a1, t1, t0
+; RV64I-NEXT: sd a1, 688(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a1, s5, a4
+; RV64I-NEXT: sd a1, 680(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a1, a5, a6
+; RV64I-NEXT: sd a1, 672(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a1, a0, a7
+; RV64I-NEXT: sd a1, 664(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 12
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 816(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 13
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 840(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 14
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 8
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 15
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 16
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 784(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 17
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 800(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 64
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 18
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 824(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 128
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 19
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 256
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 20
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 512
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 21
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 760(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1024
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 22
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 768(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2048
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 23
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 776(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4096
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 24
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 792(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui s10, 8192
+; RV64I-NEXT: and a4, a2, s10
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 25
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16384
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 26
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32768
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 27
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 728(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 65536
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 28
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 131072
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 29
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 744(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 262144
+; RV64I-NEXT: and a4, a2, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s11, 30
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 752(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a4, 1
+; RV64I-NEXT: slli a1, a4, 11
+; RV64I-NEXT: sd a1, 600(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a5, a4, 32
+; RV64I-NEXT: slli a6, a4, 33
+; RV64I-NEXT: sd a6, 584(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, a4, 34
+; RV64I-NEXT: slli a7, a4, 35
+; RV64I-NEXT: sd a7, 640(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a7, a4, 36
+; RV64I-NEXT: sd a7, 576(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t0, a4, 37
+; RV64I-NEXT: sd t0, 624(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t1, a4, 38
+; RV64I-NEXT: sd t1, 568(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t0, a4, 39
+; RV64I-NEXT: slli t2, a4, 40
+; RV64I-NEXT: sd t2, 560(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t3, a4, 41
+; RV64I-NEXT: sd t3, 648(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s0, a4, 42
+; RV64I-NEXT: slli s1, a4, 43
+; RV64I-NEXT: slli t3, a4, 44
+; RV64I-NEXT: sd t3, 616(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t3, a4, 45
+; RV64I-NEXT: sd t3, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t4, a4, 46
+; RV64I-NEXT: sd t4, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t4, a4, 47
+; RV64I-NEXT: sd t4, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t5, a4, 48
+; RV64I-NEXT: sd t5, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t5, a4, 49
+; RV64I-NEXT: slli t6, a4, 50
+; RV64I-NEXT: sd t6, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t6, a4, 51
+; RV64I-NEXT: sd t6, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s2, a4, 52
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t6, a4, 53
+; RV64I-NEXT: slli s3, a4, 54
+; RV64I-NEXT: sd s3, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s3, a4, 55
+; RV64I-NEXT: slli s4, a4, 56
+; RV64I-NEXT: slli s5, a4, 57
+; RV64I-NEXT: slli s6, a4, 58
+; RV64I-NEXT: slli s7, a4, 59
+; RV64I-NEXT: slli s8, a4, 60
+; RV64I-NEXT: slli s9, a4, 61
+; RV64I-NEXT: slli a4, a4, 62
+; RV64I-NEXT: sd a4, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 8
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 64
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 128
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 256
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 512
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1024
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2048
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4096
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 472(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a3, s10
+; RV64I-NEXT: sd a4, 480(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16384
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32768
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 592(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 65536
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 656(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 131072
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 704(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 262144
+; RV64I-NEXT: and a4, a3, a4
+; RV64I-NEXT: sd a4, 712(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a1
+; RV64I-NEXT: sd a4, 456(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a5
+; RV64I-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a6
+; RV64I-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a0
+; RV64I-NEXT: sd a4, 160(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a6, 640(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, a6
+; RV64I-NEXT: sd a4, 168(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, a7
+; RV64I-NEXT: sd a4, 176(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a7, 624(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, a7
+; RV64I-NEXT: sd a4, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t1
+; RV64I-NEXT: sd a4, 192(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t0
+; RV64I-NEXT: sd a4, 296(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t2
+; RV64I-NEXT: sd a4, 280(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 648(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, a1
+; RV64I-NEXT: sd a4, 344(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s0
+; RV64I-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s1
+; RV64I-NEXT: sd a4, 200(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t2, 616(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t2
+; RV64I-NEXT: sd a4, 208(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t3
+; RV64I-NEXT: sd a4, 216(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t3, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t3
+; RV64I-NEXT: sd a4, 224(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t4
; RV64I-NEXT: sd a4, 232(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s3, a3
-; RV64I-NEXT: mv s5, a2
-; RV64I-NEXT: mv s2, a1
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, a2, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: andi a1, s5, 1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, a0, s6
-; RV64I-NEXT: andi a1, s5, 4
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: andi a1, s5, 8
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s6, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: andi a1, s5, 16
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: andi a1, s5, 32
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: andi a1, s5, 64
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: andi a1, s5, 128
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: andi a1, s5, 256
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: andi a1, s5, 512
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: andi a1, s5, 1024
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: li s9, 1
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 11
+; RV64I-NEXT: ld t4, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t4
+; RV64I-NEXT: sd a4, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t5
+; RV64I-NEXT: mv s10, t5
+; RV64I-NEXT: sd a4, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t1, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t1
+; RV64I-NEXT: sd a4, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld t5, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, t5
+; RV64I-NEXT: sd a4, 320(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s2
+; RV64I-NEXT: sd a4, 352(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, t6
+; RV64I-NEXT: sd a4, 376(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, a2, s2
+; RV64I-NEXT: sd a4, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s3
+; RV64I-NEXT: sd a4, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s4
+; RV64I-NEXT: sd a4, 272(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s5
+; RV64I-NEXT: sd a4, 288(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s6
+; RV64I-NEXT: sd a4, 304(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s7
+; RV64I-NEXT: sd a4, 312(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s8
+; RV64I-NEXT: sd a4, 328(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, a2, s9
+; RV64I-NEXT: sd a4, 336(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: sd a2, 360(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 600(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a5, a3, a5
+; RV64I-NEXT: sd a5, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 584(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a3, a0
+; RV64I-NEXT: sd a2, 464(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a3, a6
+; RV64I-NEXT: sd a2, 488(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 576(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 512(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a3, a7
+; RV64I-NEXT: sd a2, 520(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 568(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 536(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a3, t0
+; RV64I-NEXT: sd a2, 544(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a2, 560(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 552(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: sd a1, 560(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and s0, a3, s0
+; RV64I-NEXT: sd s0, 568(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and s1, a3, s1
+; RV64I-NEXT: sd s1, 576(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, t2
+; RV64I-NEXT: sd a1, 584(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: sd a1, 600(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, t3
+; RV64I-NEXT: sd a1, 616(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: sd a1, 624(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, t4
+; RV64I-NEXT: sd a1, 640(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a1, a3, s10
+; RV64I-NEXT: sd a1, 648(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t0, a3, t1
+; RV64I-NEXT: and a0, a3, t5
+; RV64I-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a0, a3, a0
+; RV64I-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a3, t6
+; RV64I-NEXT: sd a0, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t6, a3, s2
+; RV64I-NEXT: and s0, a3, s3
+; RV64I-NEXT: and s2, a3, s4
+; RV64I-NEXT: and t5, a3, s5
+; RV64I-NEXT: and t4, a3, s6
+; RV64I-NEXT: and t3, a3, s7
+; RV64I-NEXT: and a0, a3, s8
+; RV64I-NEXT: sd a0, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t2, a3, s9
+; RV64I-NEXT: and a7, a3, a4
+; RV64I-NEXT: andi a3, a3, 1
+; RV64I-NEXT: seqz a2, a3
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a2, a2, ra
+; RV64I-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, a2, a1
+; RV64I-NEXT: ld a1, 872(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 864(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s3, a1, a2
+; RV64I-NEXT: ld a1, 856(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 848(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s4, a1, a2
+; RV64I-NEXT: ld a1, 832(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 808(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s10, a1, a2
+; RV64I-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 12
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 856(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 13
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 880(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 14
+; RV64I-NEXT: and s5, a2, a3
+; RV64I-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 15
+; RV64I-NEXT: and s6, a2, a3
+; RV64I-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 16
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 808(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 17
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 848(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 18
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 872(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a2, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 19
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a3, a1
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: slli a4, ra, 20
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: ld a1, 696(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 21
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 720(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 22
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 23
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 832(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 24
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 864(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 480(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a4, a1
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 25
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: ld a1, 496(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 26
+; RV64I-NEXT: and t1, a5, a6
+; RV64I-NEXT: ld a1, 592(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 27
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 592(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 656(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 28
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 656(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 704(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 29
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 704(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 712(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, ra, 30
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 712(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 688(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 680(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a5
+; RV64I-NEXT: sd a1, 496(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 904(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 672(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a5, a1
+; RV64I-NEXT: sd a1, 480(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 896(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 664(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a5, a1
+; RV64I-NEXT: sd a1, 472(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 11
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 456(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 632(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 608(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a5
+; RV64I-NEXT: sd a1, 448(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 528(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 504(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a5
+; RV64I-NEXT: sd a1, 504(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a5
+; RV64I-NEXT: sd a1, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 32
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 33
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 34
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 608(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 35
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 672(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 36
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 688(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 37
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 896(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 38
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 904(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 39
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 40
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 41
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 42
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 43
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 528(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 44
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 632(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 45
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 664(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 46
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 680(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 47
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 48
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 49
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 50
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 51
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 52
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 53
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 54
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 55
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 56
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 57
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 58
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 59
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 60
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, s11, 61
+; RV64I-NEXT: and a1, a5, a6
+; RV64I-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, s11, 62
+; RV64I-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: and a0, a5, a0
+; RV64I-NEXT: sd a0, 360(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a6, s1, s3
+; RV64I-NEXT: ld a0, 920(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, s4, a0
+; RV64I-NEXT: sd a0, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 912(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, s10, a0
; RV64I-NEXT: sd a0, 224(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: lui a0, 2
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: lui a0, 8
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: lui a0, 16
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 32
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 64
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: lui a0, 256
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: lui a0, 512
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 1024
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 2048
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 4096
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: lui a0, 16384
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: lui a0, 32768
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 65536
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 131072
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 262144
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 31
+; RV64I-NEXT: ld a0, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a5, ra, 11
+; RV64I-NEXT: and a0, a0, a5
+; RV64I-NEXT: sd a0, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a0, s5, s6
; RV64I-NEXT: sd a0, 216(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: slli a1, s9, 32
-; RV64I-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: slli a0, s9, 33
+; RV64I-NEXT: xor a2, a2, a3
+; RV64I-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor a0, a4, t1
; RV64I-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: slli a0, s9, 34
-; RV64I-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 35
-; RV64I-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 36
+; RV64I-NEXT: ld a0, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 32
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 33
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 464(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 34
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 464(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 488(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 35
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 488(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 512(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 36
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 512(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 520(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 37
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 912(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 536(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 38
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 544(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 39
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 536(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 552(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 40
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 520(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 560(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 41
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 560(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 568(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli s1, ra, 42
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: sd a0, 568(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 576(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 43
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 576(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 584(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 44
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 584(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 600(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 45
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 600(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 616(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 46
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sd a0, 616(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 624(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a4, ra, 47
+; RV64I-NEXT: and s7, a0, a4
+; RV64I-NEXT: ld a0, 640(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a2, ra, 48
+; RV64I-NEXT: and s6, a0, a2
+; RV64I-NEXT: ld a0, 648(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 49
+; RV64I-NEXT: and a0, a0, a3
; RV64I-NEXT: sd a0, 176(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 37
-; RV64I-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 38
-; RV64I-NEXT: sd a0, 160(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: slli a1, s9, 39
-; RV64I-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: slli a0, s9, 40
-; RV64I-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s6, a0
-; RV64I-NEXT: slli a0, s9, 41
-; RV64I-NEXT: sd a0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 42
-; RV64I-NEXT: sd a0, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 43
-; RV64I-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 44
-; RV64I-NEXT: sd a0, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 45
-; RV64I-NEXT: sd a0, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: slli a0, s9, 46
-; RV64I-NEXT: sd a0, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: slli a1, s9, 47
-; RV64I-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s1, s0, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: slli a0, s9, 48
-; RV64I-NEXT: sd a0, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s6, a0
-; RV64I-NEXT: slli a0, s9, 49
-; RV64I-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 50
-; RV64I-NEXT: sd a0, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 51
-; RV64I-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 52
-; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 53
-; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 54
-; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: slli a0, s9, 55
-; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s0, a0
-; RV64I-NEXT: slli a1, s9, 56
-; RV64I-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s0, s1, a0
-; RV64I-NEXT: and a1, s5, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: slli a0, s9, 57
-; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s5, a0
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s7, s6, a0
-; RV64I-NEXT: slli s6, s9, 58
-; RV64I-NEXT: and a1, s5, s6
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s8, s7, a0
-; RV64I-NEXT: slli s7, s9, 59
-; RV64I-NEXT: and a1, s5, s7
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s8, s8, a0
-; RV64I-NEXT: slli s11, s9, 60
-; RV64I-NEXT: and a1, s5, s11
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s10, s8, a0
-; RV64I-NEXT: slli s8, s9, 61
-; RV64I-NEXT: and a1, s5, s8
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s10, s10, a0
-; RV64I-NEXT: slli s9, s9, 62
-; RV64I-NEXT: and a1, s5, s9
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s10, a0
-; RV64I-NEXT: li s10, -1
-; RV64I-NEXT: slli s10, s10, 63
-; RV64I-NEXT: and a1, s5, s10
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s4, s0, a0
-; RV64I-NEXT: andi a1, s3, 2
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s3, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s0, a0, s5
-; RV64I-NEXT: andi a1, s3, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s3, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: andi a1, s3, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s3, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: andi a1, s3, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: andi a1, s3, 128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s3, 256
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: andi a1, s3, 512
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: andi a1, s3, 1024
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 224(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: lui a0, 2
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: lui a0, 4
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a0, 8
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: lui a0, 16
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 32
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 64
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: lui a0, 128
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a0, 256
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: lui a0, 512
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 1024
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 2048
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 4096
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: lui a0, 8192
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a0, 16384
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: lui a0, 32768
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 65536
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 131072
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: lui a0, 262144
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 216(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: ld a0, 200(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: ld a0, 144(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: ld a0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: ld a0, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
+; RV64I-NEXT: seqz a0, t0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 50
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
; RV64I-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 51
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 544(sp) # 8-byte Folded Spill
; RV64I-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s0, s0, a0
-; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: and a1, s3, s6
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s7
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s11
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s9
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: and a1, s3, s10
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s0, a0
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 52
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 552(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 53
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 624(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a0, t6
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 54
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: sd a0, 640(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a0, s0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli s0, ra, 55
+; RV64I-NEXT: and a0, a0, s0
+; RV64I-NEXT: sd a0, 648(sp) # 8-byte Folded Spill
+; RV64I-NEXT: seqz a0, s2
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli a3, ra, 56
+; RV64I-NEXT: and s4, a0, a3
+; RV64I-NEXT: seqz a2, t5
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, ra, 57
+; RV64I-NEXT: and s3, a2, a3
+; RV64I-NEXT: seqz a3, t4
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: slli a5, ra, 58
+; RV64I-NEXT: and s5, a3, a5
+; RV64I-NEXT: seqz a4, t3
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, ra, 59
+; RV64I-NEXT: and s8, a4, a5
+; RV64I-NEXT: ld a0, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz a5, a0
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli t0, ra, 60
+; RV64I-NEXT: and s10, a5, t0
+; RV64I-NEXT: seqz t0, t2
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: slli a1, ra, 61
+; RV64I-NEXT: and s9, t0, a1
+; RV64I-NEXT: slli t6, ra, 62
+; RV64I-NEXT: seqz a1, a7
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a0, a1, t6
+; RV64I-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 496(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 480(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, a0, a1
+; RV64I-NEXT: ld a0, 944(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, a1, a0
+; RV64I-NEXT: ld a0, 816(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, a1, a0
+; RV64I-NEXT: ld a0, 784(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, a1, a0
+; RV64I-NEXT: ld a0, 760(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 504(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, a1, a0
+; RV64I-NEXT: ld a0, 728(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, a1, a0
+; RV64I-NEXT: ld a0, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 384(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, a0, s0
+; RV64I-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 280(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, a0, s1
; RV64I-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sd s4, 0(a0)
+; RV64I-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a0, a1
+; RV64I-NEXT: ld a0, 272(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, a0, a1
+; RV64I-NEXT: ld a0, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, a6, a0
+; RV64I-NEXT: ld a0, 936(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: ld a1, 856(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 808(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a3, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 696(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a4, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 592(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a6, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, a6
+; RV64I-NEXT: ld a6, 536(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld ra, 520(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a6, ra
+; RV64I-NEXT: xor s7, s7, s6
+; RV64I-NEXT: xor s3, s4, s3
+; RV64I-NEXT: xor t2, s2, t2
+; RV64I-NEXT: ld s2, 840(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t3, s2
+; RV64I-NEXT: ld s2, 800(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t4, s2
+; RV64I-NEXT: ld s2, 768(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t5, s2
+; RV64I-NEXT: ld s2, 736(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, t6, s2
+; RV64I-NEXT: ld s2, 608(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s0, s2
+; RV64I-NEXT: ld s2, 344(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, s1, s2
+; RV64I-NEXT: ld s2, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, s2
+; RV64I-NEXT: ld s2, 304(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, s2
+; RV64I-NEXT: xor a0, t1, a0
+; RV64I-NEXT: ld t1, 880(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, t1
+; RV64I-NEXT: ld t1, 848(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, t1
+; RV64I-NEXT: ld t1, 720(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a3, t1
+; RV64I-NEXT: ld t1, 656(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a4, t1
+; RV64I-NEXT: ld t1, 464(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, t1
+; RV64I-NEXT: ld t1, 560(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a6, t1
+; RV64I-NEXT: ld t1, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, s7, t1
+; RV64I-NEXT: xor s2, s3, s5
+; RV64I-NEXT: xor t2, t2, t3
+; RV64I-NEXT: ld t3, 824(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t4, t3
+; RV64I-NEXT: ld t4, 776(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t5, t4
+; RV64I-NEXT: ld t5, 744(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t6, t5
+; RV64I-NEXT: ld t6, 672(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s0, t6
+; RV64I-NEXT: ld s0, 392(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s1, s0
+; RV64I-NEXT: ld s1, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, s1
+; RV64I-NEXT: ld s1, 312(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, s1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 872(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 832(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 704(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 488(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 568(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: ld a6, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, t1, a6
+; RV64I-NEXT: xor t1, s2, s8
+; RV64I-NEXT: xor t2, t2, t3
+; RV64I-NEXT: ld t3, 792(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t4, t3
+; RV64I-NEXT: ld t4, 752(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t5, t4
+; RV64I-NEXT: ld t5, 688(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t6, t5
+; RV64I-NEXT: ld t6, 528(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s0, t6
+; RV64I-NEXT: ld s0, 320(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, s0
+; RV64I-NEXT: ld s0, 328(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, s0
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 864(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 712(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 512(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 576(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 544(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: xor a6, t1, s10
+; RV64I-NEXT: xor t1, t2, t3
+; RV64I-NEXT: ld t2, 952(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t4, t2
+; RV64I-NEXT: ld t3, 896(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t5, t3
+; RV64I-NEXT: ld t4, 632(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t6, t4
+; RV64I-NEXT: ld t5, 352(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, t5
+; RV64I-NEXT: ld t5, 336(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, t5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 928(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 912(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 584(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 552(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: xor a5, a6, s9
+; RV64I-NEXT: xor a6, t1, t2
+; RV64I-NEXT: ld t1, 904(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t3, t1
+; RV64I-NEXT: ld t2, 664(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t4, t2
+; RV64I-NEXT: ld t3, 376(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, t3
+; RV64I-NEXT: ld t3, 360(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, t3
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 920(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 600(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 624(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: xor a5, a6, t1
+; RV64I-NEXT: ld a6, 680(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, t2, a6
+; RV64I-NEXT: ld t1, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, t1
+; RV64I-NEXT: ld t1, 960(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, t1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 616(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 640(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 888(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: xor a4, a5, a6
+; RV64I-NEXT: ld a5, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a7, a5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 648(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: xor a4, a4, a5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: xor s1, a4, t0
+; RV64I-NEXT: xor s2, a0, a3
+; RV64I-NEXT: ld a0, 968(sp) # 8-byte Folded Reload
+; RV64I-NEXT: sd s1, 0(a0)
; RV64I-NEXT: sd s2, 8(a0)
-; RV64I-NEXT: mv a0, s4
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s2
; RV64I-NEXT: call vector_use
-; RV64I-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sd s4, 0(a0)
+; RV64I-NEXT: ld a0, 976(sp) # 8-byte Folded Reload
+; RV64I-NEXT: sd s1, 0(a0)
; RV64I-NEXT: sd s2, 8(a0)
-; RV64I-NEXT: ld ra, 344(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 336(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 328(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 320(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 312(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 304(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 296(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 288(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 280(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 272(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 264(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s10, 256(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s11, 248(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 352
+; RV64I-NEXT: ld ra, 1080(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 1072(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 1064(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 1056(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 1048(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 1040(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 1032(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 1024(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 1016(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 1008(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 1000(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 992(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 984(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 1088
; RV64I-NEXT: ret
;
; RV32IM-LABEL: mul_use_commutative_clmul_v2i64:
>From e76a83e9793f621ef68e7790e3282a09bb220a25 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 19 Feb 2026 16:33:58 -0800
Subject: [PATCH 2/2] Add hasBitTest check and comments from #177566.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 12 +-
llvm/test/CodeGen/RISCV/clmul.ll | 28246 ++++++++--------
llvm/test/CodeGen/RISCV/clmulh.ll | 25286 +++++++-------
llvm/test/CodeGen/RISCV/clmulr.ll | 25562 +++++++-------
llvm/test/CodeGen/X86/clmul.ll | 4602 ++-
5 files changed, 42266 insertions(+), 41442 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 62e58f18ff024..704e7d086ceb4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8474,17 +8474,23 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
SDValue Res = DAG.getConstant(0, DL, VT);
for (unsigned I = 0; I < BW; ++I) {
+ SDValue ShiftAmt = DAG.getShiftAmountConstant(I, VT, DL);
SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
+
+ // For targets with a fast bit test instruction (e.g., x86 BT) or without
+ // multiply, use a shift-based expansion to avoid expensive MUL
+ // instructions.
SDValue Part;
- if (isOperationLegalOrCustom(
+ if (!hasBitTest(Y, ShiftAmt) &&
+ isOperationLegalOrCustom(
ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
} else {
+ // Canonical bit test: (Y & (1 << I)) != 0
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETNE);
- SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X,
- DAG.getShiftAmountConstant(I, VT, DL));
+ SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X, ShiftAmt);
Part = DAG.getSelect(DL, VT, Cond, XShifted, Zero);
}
Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll
index 7ad597892d615..6930e2628909d 100644
--- a/llvm/test/CodeGen/RISCV/clmul.ll
+++ b/llvm/test/CodeGen/RISCV/clmul.ll
@@ -57,35 +57,99 @@ define i4 @clmul_i4(i4 %a, i4 %b) nounwind {
; RV64I-NEXT: xor a0, a2, a0
; RV64I-NEXT: ret
;
-; CHECK-M-LABEL: clmul_i4:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: andi a2, a1, 2
-; CHECK-M-NEXT: andi a3, a1, 1
-; CHECK-M-NEXT: andi a4, a1, 4
-; CHECK-M-NEXT: andi a1, a1, 8
-; CHECK-M-NEXT: mul a2, a0, a2
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a2, a3, a2
-; CHECK-M-NEXT: xor a0, a4, a0
-; CHECK-M-NEXT: xor a0, a2, a0
-; CHECK-M-NEXT: ret
+; RV32IM-LABEL: clmul_i4:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: slli a2, a1, 30
+; RV32IM-NEXT: slli a3, a0, 1
+; RV32IM-NEXT: slli a4, a1, 29
+; RV32IM-NEXT: srli a2, a2, 31
+; RV32IM-NEXT: neg a2, a2
+; RV32IM-NEXT: and a2, a2, a3
+; RV32IM-NEXT: slli a3, a0, 2
+; RV32IM-NEXT: srli a4, a4, 31
+; RV32IM-NEXT: neg a4, a4
+; RV32IM-NEXT: and a3, a4, a3
+; RV32IM-NEXT: slli a4, a1, 31
+; RV32IM-NEXT: slli a1, a1, 28
+; RV32IM-NEXT: srai a4, a4, 31
+; RV32IM-NEXT: and a4, a4, a0
+; RV32IM-NEXT: slli a0, a0, 3
+; RV32IM-NEXT: srli a1, a1, 31
+; RV32IM-NEXT: neg a1, a1
+; RV32IM-NEXT: and a0, a1, a0
+; RV32IM-NEXT: xor a2, a4, a2
+; RV32IM-NEXT: xor a0, a3, a0
+; RV32IM-NEXT: xor a0, a2, a0
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: clmul_i4:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: slli a2, a1, 62
+; RV64IM-NEXT: slli a3, a0, 1
+; RV64IM-NEXT: slli a4, a1, 61
+; RV64IM-NEXT: srli a2, a2, 63
+; RV64IM-NEXT: neg a2, a2
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: slli a3, a0, 2
+; RV64IM-NEXT: srli a4, a4, 63
+; RV64IM-NEXT: neg a4, a4
+; RV64IM-NEXT: and a3, a4, a3
+; RV64IM-NEXT: slli a4, a1, 63
+; RV64IM-NEXT: slli a1, a1, 60
+; RV64IM-NEXT: srai a4, a4, 63
+; RV64IM-NEXT: and a4, a4, a0
+; RV64IM-NEXT: slli a0, a0, 3
+; RV64IM-NEXT: srli a1, a1, 63
+; RV64IM-NEXT: neg a1, a1
+; RV64IM-NEXT: and a0, a1, a0
+; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: xor a0, a3, a0
+; RV64IM-NEXT: xor a0, a2, a0
+; RV64IM-NEXT: ret
+;
+; RV32IMZBS-LABEL: clmul_i4:
+; RV32IMZBS: # %bb.0:
+; RV32IMZBS-NEXT: bexti a2, a1, 1
+; RV32IMZBS-NEXT: slli a3, a0, 1
+; RV32IMZBS-NEXT: bexti a4, a1, 2
+; RV32IMZBS-NEXT: neg a2, a2
+; RV32IMZBS-NEXT: and a2, a2, a3
+; RV32IMZBS-NEXT: slli a3, a0, 2
+; RV32IMZBS-NEXT: neg a4, a4
+; RV32IMZBS-NEXT: and a3, a4, a3
+; RV32IMZBS-NEXT: slli a4, a1, 31
+; RV32IMZBS-NEXT: bexti a1, a1, 3
+; RV32IMZBS-NEXT: srai a4, a4, 31
+; RV32IMZBS-NEXT: and a4, a4, a0
+; RV32IMZBS-NEXT: slli a0, a0, 3
+; RV32IMZBS-NEXT: neg a1, a1
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: xor a2, a4, a2
+; RV32IMZBS-NEXT: xor a0, a3, a0
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: ret
;
-; CHECK-ZBS-LABEL: clmul_i4:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: andi a2, a1, 2
-; CHECK-ZBS-NEXT: andi a3, a1, 1
-; CHECK-ZBS-NEXT: andi a4, a1, 4
-; CHECK-ZBS-NEXT: andi a1, a1, 8
-; CHECK-ZBS-NEXT: mul a2, a0, a2
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a2, a3, a2
-; CHECK-ZBS-NEXT: xor a0, a4, a0
-; CHECK-ZBS-NEXT: xor a0, a2, a0
-; CHECK-ZBS-NEXT: ret
+; RV64IMZBS-LABEL: clmul_i4:
+; RV64IMZBS: # %bb.0:
+; RV64IMZBS-NEXT: bexti a2, a1, 1
+; RV64IMZBS-NEXT: slli a3, a0, 1
+; RV64IMZBS-NEXT: bexti a4, a1, 2
+; RV64IMZBS-NEXT: neg a2, a2
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: slli a3, a0, 2
+; RV64IMZBS-NEXT: neg a4, a4
+; RV64IMZBS-NEXT: and a3, a4, a3
+; RV64IMZBS-NEXT: slli a4, a1, 63
+; RV64IMZBS-NEXT: bexti a1, a1, 3
+; RV64IMZBS-NEXT: srai a4, a4, 63
+; RV64IMZBS-NEXT: and a4, a4, a0
+; RV64IMZBS-NEXT: slli a0, a0, 3
+; RV64IMZBS-NEXT: neg a1, a1
+; RV64IMZBS-NEXT: and a0, a1, a0
+; RV64IMZBS-NEXT: xor a2, a4, a2
+; RV64IMZBS-NEXT: xor a0, a3, a0
+; RV64IMZBS-NEXT: xor a0, a2, a0
+; RV64IMZBS-NEXT: ret
%res = call i4 @llvm.clmul.i4(i4 %a, i4 %b)
ret i4 %res
}
@@ -189,59 +253,187 @@ define i8 @clmul_i8(i8 %a, i8 %b) nounwind {
; RV64I-NEXT: xor a0, a2, a0
; RV64I-NEXT: ret
;
-; CHECK-M-LABEL: clmul_i8:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: andi a2, a1, 2
-; CHECK-M-NEXT: andi a3, a1, 1
-; CHECK-M-NEXT: andi a4, a1, 4
-; CHECK-M-NEXT: andi a5, a1, 8
-; CHECK-M-NEXT: mul a2, a0, a2
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: xor a2, a3, a2
-; CHECK-M-NEXT: andi a3, a1, 16
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: xor a4, a4, a5
-; CHECK-M-NEXT: andi a5, a1, 32
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: xor a3, a3, a5
-; CHECK-M-NEXT: xor a2, a2, a4
-; CHECK-M-NEXT: andi a4, a1, 64
-; CHECK-M-NEXT: andi a1, a1, -128
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: xor a3, a3, a4
-; CHECK-M-NEXT: xor a2, a2, a3
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a0, a2, a0
-; CHECK-M-NEXT: ret
+; RV32IM-LABEL: clmul_i8:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: slli a2, a1, 30
+; RV32IM-NEXT: slli a3, a0, 1
+; RV32IM-NEXT: slli a4, a1, 29
+; RV32IM-NEXT: slli a5, a0, 2
+; RV32IM-NEXT: slli a6, a1, 28
+; RV32IM-NEXT: slli a7, a0, 3
+; RV32IM-NEXT: slli t0, a1, 27
+; RV32IM-NEXT: srli a2, a2, 31
+; RV32IM-NEXT: neg a2, a2
+; RV32IM-NEXT: and a2, a2, a3
+; RV32IM-NEXT: slli a3, a0, 4
+; RV32IM-NEXT: srli a4, a4, 31
+; RV32IM-NEXT: neg a4, a4
+; RV32IM-NEXT: and a4, a4, a5
+; RV32IM-NEXT: slli a5, a1, 26
+; RV32IM-NEXT: srli a6, a6, 31
+; RV32IM-NEXT: neg a6, a6
+; RV32IM-NEXT: and a6, a6, a7
+; RV32IM-NEXT: slli a7, a0, 5
+; RV32IM-NEXT: srli t0, t0, 31
+; RV32IM-NEXT: neg t0, t0
+; RV32IM-NEXT: and a3, t0, a3
+; RV32IM-NEXT: slli t0, a1, 25
+; RV32IM-NEXT: srli a5, a5, 31
+; RV32IM-NEXT: neg a5, a5
+; RV32IM-NEXT: and a5, a5, a7
+; RV32IM-NEXT: slli a7, a0, 6
+; RV32IM-NEXT: srli t0, t0, 31
+; RV32IM-NEXT: neg t0, t0
+; RV32IM-NEXT: and a7, t0, a7
+; RV32IM-NEXT: slli t0, a1, 31
+; RV32IM-NEXT: srai t0, t0, 31
+; RV32IM-NEXT: and t0, t0, a0
+; RV32IM-NEXT: xor a2, t0, a2
+; RV32IM-NEXT: xor a4, a4, a6
+; RV32IM-NEXT: xor a3, a3, a5
+; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: slli a0, a0, 7
+; RV32IM-NEXT: srli a1, a1, 31
+; RV32IM-NEXT: neg a1, a1
+; RV32IM-NEXT: xor a2, a2, a4
+; RV32IM-NEXT: xor a3, a3, a7
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: and a0, a1, a0
+; RV32IM-NEXT: xor a0, a2, a0
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: clmul_i8:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: slli a2, a1, 62
+; RV64IM-NEXT: slli a3, a0, 1
+; RV64IM-NEXT: slli a4, a1, 61
+; RV64IM-NEXT: slli a5, a0, 2
+; RV64IM-NEXT: slli a6, a1, 60
+; RV64IM-NEXT: slli a7, a0, 3
+; RV64IM-NEXT: slli t0, a1, 59
+; RV64IM-NEXT: srli a2, a2, 63
+; RV64IM-NEXT: neg a2, a2
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: slli a3, a0, 4
+; RV64IM-NEXT: srli a4, a4, 63
+; RV64IM-NEXT: neg a4, a4
+; RV64IM-NEXT: and a4, a4, a5
+; RV64IM-NEXT: slli a5, a1, 58
+; RV64IM-NEXT: srli a6, a6, 63
+; RV64IM-NEXT: neg a6, a6
+; RV64IM-NEXT: and a6, a6, a7
+; RV64IM-NEXT: slli a7, a0, 5
+; RV64IM-NEXT: srli t0, t0, 63
+; RV64IM-NEXT: neg t0, t0
+; RV64IM-NEXT: and a3, t0, a3
+; RV64IM-NEXT: slli t0, a1, 57
+; RV64IM-NEXT: srli a5, a5, 63
+; RV64IM-NEXT: neg a5, a5
+; RV64IM-NEXT: and a5, a5, a7
+; RV64IM-NEXT: slli a7, a0, 6
+; RV64IM-NEXT: srli t0, t0, 63
+; RV64IM-NEXT: neg t0, t0
+; RV64IM-NEXT: and a7, t0, a7
+; RV64IM-NEXT: slli t0, a1, 63
+; RV64IM-NEXT: srai t0, t0, 63
+; RV64IM-NEXT: and t0, t0, a0
+; RV64IM-NEXT: xor a2, t0, a2
+; RV64IM-NEXT: xor a4, a4, a6
+; RV64IM-NEXT: xor a3, a3, a5
+; RV64IM-NEXT: slli a1, a1, 56
+; RV64IM-NEXT: slli a0, a0, 7
+; RV64IM-NEXT: srli a1, a1, 63
+; RV64IM-NEXT: neg a1, a1
+; RV64IM-NEXT: xor a2, a2, a4
+; RV64IM-NEXT: xor a3, a3, a7
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: and a0, a1, a0
+; RV64IM-NEXT: xor a0, a2, a0
+; RV64IM-NEXT: ret
+;
+; RV32IMZBS-LABEL: clmul_i8:
+; RV32IMZBS: # %bb.0:
+; RV32IMZBS-NEXT: bexti a2, a1, 1
+; RV32IMZBS-NEXT: slli a3, a0, 1
+; RV32IMZBS-NEXT: bexti a4, a1, 2
+; RV32IMZBS-NEXT: slli a5, a0, 2
+; RV32IMZBS-NEXT: bexti a6, a1, 3
+; RV32IMZBS-NEXT: slli a7, a0, 3
+; RV32IMZBS-NEXT: bexti t0, a1, 4
+; RV32IMZBS-NEXT: neg a2, a2
+; RV32IMZBS-NEXT: and a2, a2, a3
+; RV32IMZBS-NEXT: slli a3, a0, 4
+; RV32IMZBS-NEXT: neg a4, a4
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: bexti a5, a1, 5
+; RV32IMZBS-NEXT: neg a6, a6
+; RV32IMZBS-NEXT: and a6, a6, a7
+; RV32IMZBS-NEXT: slli a7, a0, 5
+; RV32IMZBS-NEXT: neg t0, t0
+; RV32IMZBS-NEXT: and a3, t0, a3
+; RV32IMZBS-NEXT: bexti t0, a1, 6
+; RV32IMZBS-NEXT: neg a5, a5
+; RV32IMZBS-NEXT: and a5, a5, a7
+; RV32IMZBS-NEXT: slli a7, a0, 6
+; RV32IMZBS-NEXT: neg t0, t0
+; RV32IMZBS-NEXT: and a7, t0, a7
+; RV32IMZBS-NEXT: slli t0, a1, 31
+; RV32IMZBS-NEXT: srai t0, t0, 31
+; RV32IMZBS-NEXT: and t0, t0, a0
+; RV32IMZBS-NEXT: xor a2, t0, a2
+; RV32IMZBS-NEXT: xor a4, a4, a6
+; RV32IMZBS-NEXT: xor a3, a3, a5
+; RV32IMZBS-NEXT: bexti a1, a1, 7
+; RV32IMZBS-NEXT: slli a0, a0, 7
+; RV32IMZBS-NEXT: neg a1, a1
+; RV32IMZBS-NEXT: xor a2, a2, a4
+; RV32IMZBS-NEXT: xor a3, a3, a7
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: ret
;
-; CHECK-ZBS-LABEL: clmul_i8:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: andi a2, a1, 2
-; CHECK-ZBS-NEXT: andi a3, a1, 1
-; CHECK-ZBS-NEXT: andi a4, a1, 4
-; CHECK-ZBS-NEXT: andi a5, a1, 8
-; CHECK-ZBS-NEXT: mul a2, a0, a2
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: xor a2, a3, a2
-; CHECK-ZBS-NEXT: andi a3, a1, 16
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: xor a4, a4, a5
-; CHECK-ZBS-NEXT: andi a5, a1, 32
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: xor a3, a3, a5
-; CHECK-ZBS-NEXT: xor a2, a2, a4
-; CHECK-ZBS-NEXT: andi a4, a1, 64
-; CHECK-ZBS-NEXT: andi a1, a1, -128
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: xor a3, a3, a4
-; CHECK-ZBS-NEXT: xor a2, a2, a3
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a0, a2, a0
-; CHECK-ZBS-NEXT: ret
+; RV64IMZBS-LABEL: clmul_i8:
+; RV64IMZBS: # %bb.0:
+; RV64IMZBS-NEXT: bexti a2, a1, 1
+; RV64IMZBS-NEXT: slli a3, a0, 1
+; RV64IMZBS-NEXT: bexti a4, a1, 2
+; RV64IMZBS-NEXT: slli a5, a0, 2
+; RV64IMZBS-NEXT: bexti a6, a1, 3
+; RV64IMZBS-NEXT: slli a7, a0, 3
+; RV64IMZBS-NEXT: bexti t0, a1, 4
+; RV64IMZBS-NEXT: neg a2, a2
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: slli a3, a0, 4
+; RV64IMZBS-NEXT: neg a4, a4
+; RV64IMZBS-NEXT: and a4, a4, a5
+; RV64IMZBS-NEXT: bexti a5, a1, 5
+; RV64IMZBS-NEXT: neg a6, a6
+; RV64IMZBS-NEXT: and a6, a6, a7
+; RV64IMZBS-NEXT: slli a7, a0, 5
+; RV64IMZBS-NEXT: neg t0, t0
+; RV64IMZBS-NEXT: and a3, t0, a3
+; RV64IMZBS-NEXT: bexti t0, a1, 6
+; RV64IMZBS-NEXT: neg a5, a5
+; RV64IMZBS-NEXT: and a5, a5, a7
+; RV64IMZBS-NEXT: slli a7, a0, 6
+; RV64IMZBS-NEXT: neg t0, t0
+; RV64IMZBS-NEXT: and a7, t0, a7
+; RV64IMZBS-NEXT: slli t0, a1, 63
+; RV64IMZBS-NEXT: srai t0, t0, 63
+; RV64IMZBS-NEXT: and t0, t0, a0
+; RV64IMZBS-NEXT: xor a2, t0, a2
+; RV64IMZBS-NEXT: xor a4, a4, a6
+; RV64IMZBS-NEXT: xor a3, a3, a5
+; RV64IMZBS-NEXT: bexti a1, a1, 7
+; RV64IMZBS-NEXT: slli a0, a0, 7
+; RV64IMZBS-NEXT: neg a1, a1
+; RV64IMZBS-NEXT: xor a2, a2, a4
+; RV64IMZBS-NEXT: xor a3, a3, a7
+; RV64IMZBS-NEXT: xor a2, a2, a3
+; RV64IMZBS-NEXT: and a0, a1, a0
+; RV64IMZBS-NEXT: xor a0, a2, a0
+; RV64IMZBS-NEXT: ret
%res = call i8 @llvm.clmul.i8(i8 %a, i8 %b)
ret i8 %res
}
@@ -465,118 +657,385 @@ define i16 @clmul_i16(i16 %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
-; CHECK-M-LABEL: clmul_i16:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: andi a2, a1, 2
-; CHECK-M-NEXT: andi a3, a1, 1
-; CHECK-M-NEXT: andi a4, a1, 4
-; CHECK-M-NEXT: andi a5, a1, 8
-; CHECK-M-NEXT: andi a6, a1, 16
-; CHECK-M-NEXT: andi a7, a1, 32
-; CHECK-M-NEXT: andi t0, a1, 64
-; CHECK-M-NEXT: andi t1, a1, 128
-; CHECK-M-NEXT: mul a2, a0, a2
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: xor a2, a3, a2
-; CHECK-M-NEXT: andi a3, a1, 256
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: xor a4, a4, a5
-; CHECK-M-NEXT: andi a5, a1, 512
-; CHECK-M-NEXT: mul a6, a0, a6
-; CHECK-M-NEXT: mul a7, a0, a7
-; CHECK-M-NEXT: xor a6, a6, a7
-; CHECK-M-NEXT: li a7, 1
-; CHECK-M-NEXT: mul t1, a0, t1
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: xor a3, t1, a3
-; CHECK-M-NEXT: lui t1, 1
-; CHECK-M-NEXT: xor a2, a2, a4
-; CHECK-M-NEXT: lui a4, 2
-; CHECK-M-NEXT: mul t0, a0, t0
-; CHECK-M-NEXT: xor a6, a6, t0
-; CHECK-M-NEXT: lui t0, 4
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: xor a3, a3, a5
-; CHECK-M-NEXT: lui a5, 1048568
-; CHECK-M-NEXT: slli a7, a7, 11
-; CHECK-M-NEXT: and t1, a1, t1
-; CHECK-M-NEXT: and a4, a1, a4
-; CHECK-M-NEXT: and t0, a1, t0
-; CHECK-M-NEXT: and a5, a1, a5
-; CHECK-M-NEXT: and a7, a1, a7
-; CHECK-M-NEXT: andi a1, a1, 1024
-; CHECK-M-NEXT: mul a1, a0, a1
-; CHECK-M-NEXT: mul t1, a0, t1
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul t0, a0, t0
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: mul a0, a0, a7
-; CHECK-M-NEXT: xor a2, a2, a6
-; CHECK-M-NEXT: xor a1, a3, a1
-; CHECK-M-NEXT: xor a0, a0, t1
-; CHECK-M-NEXT: xor a1, a2, a1
-; CHECK-M-NEXT: xor a0, a0, a4
-; CHECK-M-NEXT: xor a0, a1, a0
-; CHECK-M-NEXT: xor a1, t0, a5
-; CHECK-M-NEXT: xor a0, a0, a1
-; CHECK-M-NEXT: ret
+; RV32IM-LABEL: clmul_i16:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: addi sp, sp, -16
+; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s3, 0(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, a1, 30
+; RV32IM-NEXT: slli a3, a0, 1
+; RV32IM-NEXT: slli a4, a1, 29
+; RV32IM-NEXT: slli a5, a0, 2
+; RV32IM-NEXT: slli a6, a1, 28
+; RV32IM-NEXT: slli a7, a0, 3
+; RV32IM-NEXT: slli t0, a1, 27
+; RV32IM-NEXT: slli t1, a0, 4
+; RV32IM-NEXT: slli t2, a1, 26
+; RV32IM-NEXT: slli t3, a0, 5
+; RV32IM-NEXT: slli t4, a1, 25
+; RV32IM-NEXT: slli t5, a0, 6
+; RV32IM-NEXT: slli t6, a1, 24
+; RV32IM-NEXT: slli s0, a0, 7
+; RV32IM-NEXT: slli s1, a1, 23
+; RV32IM-NEXT: srli a2, a2, 31
+; RV32IM-NEXT: neg a2, a2
+; RV32IM-NEXT: and a2, a2, a3
+; RV32IM-NEXT: slli s2, a0, 8
+; RV32IM-NEXT: srli a4, a4, 31
+; RV32IM-NEXT: neg a3, a4
+; RV32IM-NEXT: and a3, a3, a5
+; RV32IM-NEXT: slli s3, a1, 22
+; RV32IM-NEXT: srli a4, a6, 31
+; RV32IM-NEXT: neg a4, a4
+; RV32IM-NEXT: and a4, a4, a7
+; RV32IM-NEXT: slli a6, a0, 9
+; RV32IM-NEXT: srli a5, t0, 31
+; RV32IM-NEXT: neg a5, a5
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: slli a7, a1, 21
+; RV32IM-NEXT: srli t0, t2, 31
+; RV32IM-NEXT: neg t0, t0
+; RV32IM-NEXT: and t0, t0, t3
+; RV32IM-NEXT: slli t1, a0, 10
+; RV32IM-NEXT: srli t2, t4, 31
+; RV32IM-NEXT: neg t2, t2
+; RV32IM-NEXT: and t2, t2, t5
+; RV32IM-NEXT: li t3, 1
+; RV32IM-NEXT: srli t4, t6, 31
+; RV32IM-NEXT: neg t4, t4
+; RV32IM-NEXT: and t4, t4, s0
+; RV32IM-NEXT: lui t5, 1
+; RV32IM-NEXT: srli s1, s1, 31
+; RV32IM-NEXT: neg t6, s1
+; RV32IM-NEXT: and t6, t6, s2
+; RV32IM-NEXT: lui s0, 2
+; RV32IM-NEXT: srli s1, s3, 31
+; RV32IM-NEXT: neg s1, s1
+; RV32IM-NEXT: and a6, s1, a6
+; RV32IM-NEXT: lui s1, 4
+; RV32IM-NEXT: srli a7, a7, 31
+; RV32IM-NEXT: neg a7, a7
+; RV32IM-NEXT: and a7, a7, t1
+; RV32IM-NEXT: lui t1, 1048568
+; RV32IM-NEXT: slli t3, t3, 11
+; RV32IM-NEXT: and t5, a1, t5
+; RV32IM-NEXT: and s0, a1, s0
+; RV32IM-NEXT: and s1, a1, s1
+; RV32IM-NEXT: and t1, a1, t1
+; RV32IM-NEXT: and t3, a1, t3
+; RV32IM-NEXT: slli a1, a1, 31
+; RV32IM-NEXT: srai a1, a1, 31
+; RV32IM-NEXT: and a1, a1, a0
+; RV32IM-NEXT: mul t5, a0, t5
+; RV32IM-NEXT: mul s0, a0, s0
+; RV32IM-NEXT: mul s1, a0, s1
+; RV32IM-NEXT: mul t1, a0, t1
+; RV32IM-NEXT: mul a0, a0, t3
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: xor a2, a5, t0
+; RV32IM-NEXT: xor a4, t4, t6
+; RV32IM-NEXT: xor a0, a0, t5
+; RV32IM-NEXT: xor a1, a1, a3
+; RV32IM-NEXT: xor a2, a2, t2
+; RV32IM-NEXT: xor a3, a4, a6
+; RV32IM-NEXT: xor a0, a0, s0
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: xor a2, a3, a7
+; RV32IM-NEXT: xor a0, a0, s1
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: xor a0, a0, t1
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 0(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 16
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: clmul_i16:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: addi sp, sp, -32
+; RV64IM-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s3, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a2, a1, 62
+; RV64IM-NEXT: slli a3, a0, 1
+; RV64IM-NEXT: slli a4, a1, 61
+; RV64IM-NEXT: slli a5, a0, 2
+; RV64IM-NEXT: slli a6, a1, 60
+; RV64IM-NEXT: slli a7, a0, 3
+; RV64IM-NEXT: slli t0, a1, 59
+; RV64IM-NEXT: slli t1, a0, 4
+; RV64IM-NEXT: slli t2, a1, 58
+; RV64IM-NEXT: slli t3, a0, 5
+; RV64IM-NEXT: slli t4, a1, 57
+; RV64IM-NEXT: slli t5, a0, 6
+; RV64IM-NEXT: slli t6, a1, 56
+; RV64IM-NEXT: slli s0, a0, 7
+; RV64IM-NEXT: slli s1, a1, 55
+; RV64IM-NEXT: srli a2, a2, 63
+; RV64IM-NEXT: neg a2, a2
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: slli s2, a0, 8
+; RV64IM-NEXT: srli a4, a4, 63
+; RV64IM-NEXT: neg a3, a4
+; RV64IM-NEXT: and a3, a3, a5
+; RV64IM-NEXT: slli s3, a1, 54
+; RV64IM-NEXT: srli a4, a6, 63
+; RV64IM-NEXT: neg a4, a4
+; RV64IM-NEXT: and a4, a4, a7
+; RV64IM-NEXT: slli a6, a0, 9
+; RV64IM-NEXT: srli a5, t0, 63
+; RV64IM-NEXT: neg a5, a5
+; RV64IM-NEXT: and a5, a5, t1
+; RV64IM-NEXT: slli a7, a1, 53
+; RV64IM-NEXT: srli t0, t2, 63
+; RV64IM-NEXT: neg t0, t0
+; RV64IM-NEXT: and t0, t0, t3
+; RV64IM-NEXT: slli t1, a0, 10
+; RV64IM-NEXT: srli t2, t4, 63
+; RV64IM-NEXT: neg t2, t2
+; RV64IM-NEXT: and t2, t2, t5
+; RV64IM-NEXT: li t3, 1
+; RV64IM-NEXT: srli t4, t6, 63
+; RV64IM-NEXT: neg t4, t4
+; RV64IM-NEXT: and t4, t4, s0
+; RV64IM-NEXT: lui t5, 1
+; RV64IM-NEXT: srli s1, s1, 63
+; RV64IM-NEXT: neg t6, s1
+; RV64IM-NEXT: and t6, t6, s2
+; RV64IM-NEXT: lui s0, 2
+; RV64IM-NEXT: srli s1, s3, 63
+; RV64IM-NEXT: neg s1, s1
+; RV64IM-NEXT: and a6, s1, a6
+; RV64IM-NEXT: lui s1, 4
+; RV64IM-NEXT: srli a7, a7, 63
+; RV64IM-NEXT: neg a7, a7
+; RV64IM-NEXT: and a7, a7, t1
+; RV64IM-NEXT: lui t1, 1048568
+; RV64IM-NEXT: slli t3, t3, 11
+; RV64IM-NEXT: and t5, a1, t5
+; RV64IM-NEXT: and s0, a1, s0
+; RV64IM-NEXT: and s1, a1, s1
+; RV64IM-NEXT: and t1, a1, t1
+; RV64IM-NEXT: and t3, a1, t3
+; RV64IM-NEXT: slli a1, a1, 63
+; RV64IM-NEXT: srai a1, a1, 63
+; RV64IM-NEXT: and a1, a1, a0
+; RV64IM-NEXT: mul t5, a0, t5
+; RV64IM-NEXT: mul s0, a0, s0
+; RV64IM-NEXT: mul s1, a0, s1
+; RV64IM-NEXT: mul t1, a0, t1
+; RV64IM-NEXT: mul a0, a0, t3
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: xor a3, a3, a4
+; RV64IM-NEXT: xor a2, a5, t0
+; RV64IM-NEXT: xor a4, t4, t6
+; RV64IM-NEXT: xor a0, a0, t5
+; RV64IM-NEXT: xor a1, a1, a3
+; RV64IM-NEXT: xor a2, a2, t2
+; RV64IM-NEXT: xor a3, a4, a6
+; RV64IM-NEXT: xor a0, a0, s0
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: xor a2, a3, a7
+; RV64IM-NEXT: xor a0, a0, s1
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: xor a0, a0, t1
+; RV64IM-NEXT: xor a0, a1, a0
+; RV64IM-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s3, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 32
+; RV64IM-NEXT: ret
+;
+; RV32IMZBS-LABEL: clmul_i16:
+; RV32IMZBS: # %bb.0:
+; RV32IMZBS-NEXT: addi sp, sp, -16
+; RV32IMZBS-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s3, 0(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 1
+; RV32IMZBS-NEXT: slli a3, a0, 1
+; RV32IMZBS-NEXT: bexti a4, a1, 2
+; RV32IMZBS-NEXT: slli a5, a0, 2
+; RV32IMZBS-NEXT: bexti a6, a1, 3
+; RV32IMZBS-NEXT: slli a7, a0, 3
+; RV32IMZBS-NEXT: bexti t0, a1, 4
+; RV32IMZBS-NEXT: slli t1, a0, 4
+; RV32IMZBS-NEXT: bexti t2, a1, 5
+; RV32IMZBS-NEXT: slli t3, a0, 5
+; RV32IMZBS-NEXT: bexti t4, a1, 6
+; RV32IMZBS-NEXT: slli t5, a0, 6
+; RV32IMZBS-NEXT: bexti t6, a1, 7
+; RV32IMZBS-NEXT: slli s0, a0, 7
+; RV32IMZBS-NEXT: bexti s1, a1, 8
+; RV32IMZBS-NEXT: neg a2, a2
+; RV32IMZBS-NEXT: and a2, a2, a3
+; RV32IMZBS-NEXT: slli s2, a0, 8
+; RV32IMZBS-NEXT: neg a3, a4
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: bexti s3, a1, 9
+; RV32IMZBS-NEXT: neg a4, a6
+; RV32IMZBS-NEXT: and a4, a4, a7
+; RV32IMZBS-NEXT: slli a6, a0, 9
+; RV32IMZBS-NEXT: neg a5, t0
+; RV32IMZBS-NEXT: and a5, a5, t1
+; RV32IMZBS-NEXT: bexti a7, a1, 10
+; RV32IMZBS-NEXT: neg t0, t2
+; RV32IMZBS-NEXT: and t0, t0, t3
+; RV32IMZBS-NEXT: slli t1, a0, 10
+; RV32IMZBS-NEXT: neg t2, t4
+; RV32IMZBS-NEXT: and t2, t2, t5
+; RV32IMZBS-NEXT: bexti t3, a1, 11
+; RV32IMZBS-NEXT: neg t4, t6
+; RV32IMZBS-NEXT: and t4, t4, s0
+; RV32IMZBS-NEXT: slli t5, a0, 11
+; RV32IMZBS-NEXT: neg t6, s1
+; RV32IMZBS-NEXT: and t6, t6, s2
+; RV32IMZBS-NEXT: bexti s0, a1, 12
+; RV32IMZBS-NEXT: neg s1, s3
+; RV32IMZBS-NEXT: and a6, s1, a6
+; RV32IMZBS-NEXT: slli s1, a0, 12
+; RV32IMZBS-NEXT: neg a7, a7
+; RV32IMZBS-NEXT: and a7, a7, t1
+; RV32IMZBS-NEXT: bexti t1, a1, 13
+; RV32IMZBS-NEXT: neg t3, t3
+; RV32IMZBS-NEXT: and t3, t3, t5
+; RV32IMZBS-NEXT: slli t5, a0, 13
+; RV32IMZBS-NEXT: neg s0, s0
+; RV32IMZBS-NEXT: and s0, s0, s1
+; RV32IMZBS-NEXT: bexti s1, a1, 14
+; RV32IMZBS-NEXT: neg t1, t1
+; RV32IMZBS-NEXT: and t1, t1, t5
+; RV32IMZBS-NEXT: slli t5, a0, 14
+; RV32IMZBS-NEXT: neg s1, s1
+; RV32IMZBS-NEXT: and t5, s1, t5
+; RV32IMZBS-NEXT: slli s1, a1, 31
+; RV32IMZBS-NEXT: bexti a1, a1, 15
+; RV32IMZBS-NEXT: srai s1, s1, 31
+; RV32IMZBS-NEXT: and s1, s1, a0
+; RV32IMZBS-NEXT: slli a0, a0, 15
+; RV32IMZBS-NEXT: neg a1, a1
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: xor a2, s1, a2
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: xor a1, a5, t0
+; RV32IMZBS-NEXT: xor a4, t4, t6
+; RV32IMZBS-NEXT: xor a5, t3, s0
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: xor a1, a1, t2
+; RV32IMZBS-NEXT: xor a3, a4, a6
+; RV32IMZBS-NEXT: xor a4, a5, t1
+; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: xor a2, a3, a7
+; RV32IMZBS-NEXT: xor a3, a4, t5
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: xor a0, a3, a0
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s3, 0(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: addi sp, sp, 16
+; RV32IMZBS-NEXT: ret
;
-; CHECK-ZBS-LABEL: clmul_i16:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: andi a2, a1, 2
-; CHECK-ZBS-NEXT: andi a3, a1, 1
-; CHECK-ZBS-NEXT: andi a4, a1, 4
-; CHECK-ZBS-NEXT: andi a5, a1, 8
-; CHECK-ZBS-NEXT: andi a6, a1, 16
-; CHECK-ZBS-NEXT: andi a7, a1, 32
-; CHECK-ZBS-NEXT: mul a2, a0, a2
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: xor a2, a3, a2
-; CHECK-ZBS-NEXT: andi a3, a1, 64
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: xor a4, a4, a5
-; CHECK-ZBS-NEXT: andi a5, a1, 128
-; CHECK-ZBS-NEXT: mul a6, a0, a6
-; CHECK-ZBS-NEXT: mul a7, a0, a7
-; CHECK-ZBS-NEXT: xor a6, a6, a7
-; CHECK-ZBS-NEXT: andi a7, a1, 256
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: mul a7, a0, a7
-; CHECK-ZBS-NEXT: xor a5, a5, a7
-; CHECK-ZBS-NEXT: andi a7, a1, 512
-; CHECK-ZBS-NEXT: xor a2, a2, a4
-; CHECK-ZBS-NEXT: bseti a4, zero, 11
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: xor a3, a6, a3
-; CHECK-ZBS-NEXT: lui a6, 1
-; CHECK-ZBS-NEXT: mul a7, a0, a7
-; CHECK-ZBS-NEXT: xor a5, a5, a7
-; CHECK-ZBS-NEXT: lui a7, 2
-; CHECK-ZBS-NEXT: and a4, a1, a4
-; CHECK-ZBS-NEXT: and a6, a1, a6
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a6, a0, a6
-; CHECK-ZBS-NEXT: xor a4, a4, a6
-; CHECK-ZBS-NEXT: lui a6, 4
-; CHECK-ZBS-NEXT: xor a2, a2, a3
-; CHECK-ZBS-NEXT: lui a3, 1048568
-; CHECK-ZBS-NEXT: and a7, a1, a7
-; CHECK-ZBS-NEXT: and a6, a1, a6
-; CHECK-ZBS-NEXT: and a3, a1, a3
-; CHECK-ZBS-NEXT: andi a1, a1, 1024
-; CHECK-ZBS-NEXT: mul a1, a0, a1
-; CHECK-ZBS-NEXT: xor a1, a5, a1
-; CHECK-ZBS-NEXT: mul a5, a0, a7
-; CHECK-ZBS-NEXT: xor a4, a4, a5
-; CHECK-ZBS-NEXT: xor a1, a2, a1
-; CHECK-ZBS-NEXT: mul a2, a0, a6
-; CHECK-ZBS-NEXT: xor a2, a4, a2
-; CHECK-ZBS-NEXT: xor a1, a1, a2
-; CHECK-ZBS-NEXT: mul a0, a0, a3
-; CHECK-ZBS-NEXT: xor a0, a1, a0
-; CHECK-ZBS-NEXT: ret
+; RV64IMZBS-LABEL: clmul_i16:
+; RV64IMZBS: # %bb.0:
+; RV64IMZBS-NEXT: addi sp, sp, -32
+; RV64IMZBS-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s3, 0(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a1, 1
+; RV64IMZBS-NEXT: slli a3, a0, 1
+; RV64IMZBS-NEXT: bexti a4, a1, 2
+; RV64IMZBS-NEXT: slli a5, a0, 2
+; RV64IMZBS-NEXT: bexti a6, a1, 3
+; RV64IMZBS-NEXT: slli a7, a0, 3
+; RV64IMZBS-NEXT: bexti t0, a1, 4
+; RV64IMZBS-NEXT: slli t1, a0, 4
+; RV64IMZBS-NEXT: bexti t2, a1, 5
+; RV64IMZBS-NEXT: slli t3, a0, 5
+; RV64IMZBS-NEXT: bexti t4, a1, 6
+; RV64IMZBS-NEXT: slli t5, a0, 6
+; RV64IMZBS-NEXT: bexti t6, a1, 7
+; RV64IMZBS-NEXT: slli s0, a0, 7
+; RV64IMZBS-NEXT: bexti s1, a1, 8
+; RV64IMZBS-NEXT: neg a2, a2
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: slli s2, a0, 8
+; RV64IMZBS-NEXT: neg a3, a4
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: bexti s3, a1, 9
+; RV64IMZBS-NEXT: neg a4, a6
+; RV64IMZBS-NEXT: and a4, a4, a7
+; RV64IMZBS-NEXT: slli a6, a0, 9
+; RV64IMZBS-NEXT: neg a5, t0
+; RV64IMZBS-NEXT: and a5, a5, t1
+; RV64IMZBS-NEXT: bexti a7, a1, 10
+; RV64IMZBS-NEXT: neg t0, t2
+; RV64IMZBS-NEXT: and t0, t0, t3
+; RV64IMZBS-NEXT: slli t1, a0, 10
+; RV64IMZBS-NEXT: neg t2, t4
+; RV64IMZBS-NEXT: and t2, t2, t5
+; RV64IMZBS-NEXT: bexti t3, a1, 11
+; RV64IMZBS-NEXT: neg t4, t6
+; RV64IMZBS-NEXT: and t4, t4, s0
+; RV64IMZBS-NEXT: slli t5, a0, 11
+; RV64IMZBS-NEXT: neg t6, s1
+; RV64IMZBS-NEXT: and t6, t6, s2
+; RV64IMZBS-NEXT: bexti s0, a1, 12
+; RV64IMZBS-NEXT: neg s1, s3
+; RV64IMZBS-NEXT: and a6, s1, a6
+; RV64IMZBS-NEXT: slli s1, a0, 12
+; RV64IMZBS-NEXT: neg a7, a7
+; RV64IMZBS-NEXT: and a7, a7, t1
+; RV64IMZBS-NEXT: bexti t1, a1, 13
+; RV64IMZBS-NEXT: neg t3, t3
+; RV64IMZBS-NEXT: and t3, t3, t5
+; RV64IMZBS-NEXT: slli t5, a0, 13
+; RV64IMZBS-NEXT: neg s0, s0
+; RV64IMZBS-NEXT: and s0, s0, s1
+; RV64IMZBS-NEXT: bexti s1, a1, 14
+; RV64IMZBS-NEXT: neg t1, t1
+; RV64IMZBS-NEXT: and t1, t1, t5
+; RV64IMZBS-NEXT: slli t5, a0, 14
+; RV64IMZBS-NEXT: neg s1, s1
+; RV64IMZBS-NEXT: and t5, s1, t5
+; RV64IMZBS-NEXT: slli s1, a1, 63
+; RV64IMZBS-NEXT: bexti a1, a1, 15
+; RV64IMZBS-NEXT: srai s1, s1, 63
+; RV64IMZBS-NEXT: and s1, s1, a0
+; RV64IMZBS-NEXT: slli a0, a0, 15
+; RV64IMZBS-NEXT: neg a1, a1
+; RV64IMZBS-NEXT: and a0, a1, a0
+; RV64IMZBS-NEXT: xor a2, s1, a2
+; RV64IMZBS-NEXT: xor a3, a3, a4
+; RV64IMZBS-NEXT: xor a1, a5, t0
+; RV64IMZBS-NEXT: xor a4, t4, t6
+; RV64IMZBS-NEXT: xor a5, t3, s0
+; RV64IMZBS-NEXT: xor a2, a2, a3
+; RV64IMZBS-NEXT: xor a1, a1, t2
+; RV64IMZBS-NEXT: xor a3, a4, a6
+; RV64IMZBS-NEXT: xor a4, a5, t1
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: xor a2, a3, a7
+; RV64IMZBS-NEXT: xor a3, a4, t5
+; RV64IMZBS-NEXT: xor a1, a1, a2
+; RV64IMZBS-NEXT: xor a0, a3, a0
+; RV64IMZBS-NEXT: xor a0, a1, a0
+; RV64IMZBS-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 0(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 32
+; RV64IMZBS-NEXT: ret
%res = call i16 @llvm.clmul.i16(i16 %a, i16 %b)
ret i16 %res
}
@@ -1080,596 +1539,812 @@ define i32 @clmul_i32(i32 %a, i32 %b) nounwind {
;
; RV32IM-LABEL: clmul_i32:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -48
-; RV32IM-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s9, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s10, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t6, a1, 2
-; RV32IM-NEXT: andi s1, a1, 1
-; RV32IM-NEXT: andi a7, a1, 4
-; RV32IM-NEXT: andi t2, a1, 8
-; RV32IM-NEXT: andi t0, a1, 16
-; RV32IM-NEXT: andi t3, a1, 32
-; RV32IM-NEXT: andi a2, a1, 64
-; RV32IM-NEXT: andi t4, a1, 128
-; RV32IM-NEXT: andi s0, a1, 256
-; RV32IM-NEXT: andi a3, a1, 512
-; RV32IM-NEXT: li a4, 1
-; RV32IM-NEXT: lui a5, 1
-; RV32IM-NEXT: lui a6, 2
-; RV32IM-NEXT: lui t1, 4
-; RV32IM-NEXT: lui t5, 8
-; RV32IM-NEXT: lui s2, 16
-; RV32IM-NEXT: lui s3, 32
-; RV32IM-NEXT: lui s4, 64
-; RV32IM-NEXT: lui s5, 128
-; RV32IM-NEXT: lui s6, 256
+; RV32IM-NEXT: addi sp, sp, -80
+; RV32IM-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s3, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s4, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s5, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s6, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s7, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s8, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s9, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, a0, 1
+; RV32IM-NEXT: andi a3, a1, 2
+; RV32IM-NEXT: slli s8, a0, 2
+; RV32IM-NEXT: andi s11, a1, 4
+; RV32IM-NEXT: slli a6, a0, 3
+; RV32IM-NEXT: andi s9, a1, 8
+; RV32IM-NEXT: slli t1, a0, 4
+; RV32IM-NEXT: andi s10, a1, 16
+; RV32IM-NEXT: slli t5, a0, 5
+; RV32IM-NEXT: andi s7, a1, 32
+; RV32IM-NEXT: slli t3, a0, 6
+; RV32IM-NEXT: andi s5, a1, 64
+; RV32IM-NEXT: slli s2, a0, 7
+; RV32IM-NEXT: andi s6, a1, 128
+; RV32IM-NEXT: slli t6, a0, 8
+; RV32IM-NEXT: andi s3, a1, 256
+; RV32IM-NEXT: slli s0, a0, 9
+; RV32IM-NEXT: andi s4, a1, 512
+; RV32IM-NEXT: slli t4, a0, 10
+; RV32IM-NEXT: andi s1, a1, 1024
+; RV32IM-NEXT: li a5, 1
+; RV32IM-NEXT: lui t0, 1
+; RV32IM-NEXT: lui a7, 4
+; RV32IM-NEXT: lui t2, 8
+; RV32IM-NEXT: lui ra, 16
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and a2, a3, a2
+; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a4, 32
+; RV32IM-NEXT: seqz a3, s11
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and a2, a3, s8
+; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s8, 64
+; RV32IM-NEXT: seqz s9, s9
+; RV32IM-NEXT: addi s9, s9, -1
+; RV32IM-NEXT: and a2, s9, a6
+; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s9, 128
+; RV32IM-NEXT: seqz s10, s10
+; RV32IM-NEXT: addi s10, s10, -1
+; RV32IM-NEXT: and a2, s10, t1
+; RV32IM-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s10, 256
+; RV32IM-NEXT: seqz s7, s7
+; RV32IM-NEXT: addi s7, s7, -1
+; RV32IM-NEXT: and t5, s7, t5
; RV32IM-NEXT: lui s7, 512
-; RV32IM-NEXT: lui s8, 1024
-; RV32IM-NEXT: lui s9, 2048
-; RV32IM-NEXT: lui s10, 4096
-; RV32IM-NEXT: mul t6, a0, t6
-; RV32IM-NEXT: mul s1, a0, s1
-; RV32IM-NEXT: xor t6, s1, t6
-; RV32IM-NEXT: lui s1, 8192
-; RV32IM-NEXT: mul a7, a0, a7
-; RV32IM-NEXT: mul t2, a0, t2
-; RV32IM-NEXT: xor a7, a7, t2
-; RV32IM-NEXT: lui t2, 16384
-; RV32IM-NEXT: mul t0, a0, t0
-; RV32IM-NEXT: mul t3, a0, t3
-; RV32IM-NEXT: xor t0, t0, t3
-; RV32IM-NEXT: lui t3, 32768
-; RV32IM-NEXT: mul t4, a0, t4
-; RV32IM-NEXT: mul s0, a0, s0
-; RV32IM-NEXT: xor t4, t4, s0
-; RV32IM-NEXT: lui s0, 65536
-; RV32IM-NEXT: xor a7, t6, a7
-; RV32IM-NEXT: lui t6, 131072
-; RV32IM-NEXT: mul a2, a0, a2
-; RV32IM-NEXT: xor a2, t0, a2
-; RV32IM-NEXT: lui t0, 262144
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: xor a3, t4, a3
-; RV32IM-NEXT: lui t4, 524288
-; RV32IM-NEXT: slli a4, a4, 11
+; RV32IM-NEXT: seqz s5, s5
+; RV32IM-NEXT: addi s5, s5, -1
+; RV32IM-NEXT: and a2, s5, t3
+; RV32IM-NEXT: sw a2, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s5, 1024
+; RV32IM-NEXT: seqz s6, s6
+; RV32IM-NEXT: addi s6, s6, -1
+; RV32IM-NEXT: and s2, s6, s2
+; RV32IM-NEXT: lui s6, 2048
+; RV32IM-NEXT: seqz s3, s3
+; RV32IM-NEXT: addi s3, s3, -1
+; RV32IM-NEXT: and s3, s3, t6
+; RV32IM-NEXT: lui s11, 4096
+; RV32IM-NEXT: seqz t6, s4
+; RV32IM-NEXT: addi t6, t6, -1
+; RV32IM-NEXT: and t6, t6, s0
+; RV32IM-NEXT: lui s0, 8192
+; RV32IM-NEXT: seqz s1, s1
+; RV32IM-NEXT: addi s1, s1, -1
+; RV32IM-NEXT: and t4, s1, t4
+; RV32IM-NEXT: lui s1, 16384
+; RV32IM-NEXT: and s4, a1, ra
+; RV32IM-NEXT: and a4, a1, a4
+; RV32IM-NEXT: mul s4, a0, s4
+; RV32IM-NEXT: mul a4, a0, a4
+; RV32IM-NEXT: xor s4, s4, a4
+; RV32IM-NEXT: lui ra, 32768
+; RV32IM-NEXT: and a4, a1, s6
+; RV32IM-NEXT: and s6, a1, s11
+; RV32IM-NEXT: mul a4, a0, a4
+; RV32IM-NEXT: mul s6, a0, s6
+; RV32IM-NEXT: xor s6, a4, s6
+; RV32IM-NEXT: lui s11, 65536
+; RV32IM-NEXT: slli a2, a5, 11
+; RV32IM-NEXT: and a4, a1, t0
+; RV32IM-NEXT: lui a5, 2
; RV32IM-NEXT: and a5, a1, a5
-; RV32IM-NEXT: and a6, a1, a6
-; RV32IM-NEXT: and t1, a1, t1
-; RV32IM-NEXT: and t5, a1, t5
-; RV32IM-NEXT: and s2, a1, s2
-; RV32IM-NEXT: and s3, a1, s3
-; RV32IM-NEXT: and s4, a1, s4
-; RV32IM-NEXT: and s5, a1, s5
-; RV32IM-NEXT: and s6, a1, s6
-; RV32IM-NEXT: and s7, a1, s7
+; RV32IM-NEXT: and a6, a1, a7
+; RV32IM-NEXT: and t1, a1, t2
; RV32IM-NEXT: and s8, a1, s8
; RV32IM-NEXT: and s9, a1, s9
; RV32IM-NEXT: and s10, a1, s10
-; RV32IM-NEXT: and s1, a1, s1
-; RV32IM-NEXT: and t2, a1, t2
-; RV32IM-NEXT: and t3, a1, t3
+; RV32IM-NEXT: and s7, a1, s7
+; RV32IM-NEXT: and t2, a1, s5
; RV32IM-NEXT: and s0, a1, s0
-; RV32IM-NEXT: and t6, a1, t6
+; RV32IM-NEXT: and s1, a1, s1
+; RV32IM-NEXT: and s5, a1, ra
+; RV32IM-NEXT: and s11, a1, s11
+; RV32IM-NEXT: lui ra, 131072
+; RV32IM-NEXT: and ra, a1, ra
+; RV32IM-NEXT: lui t0, 262144
; RV32IM-NEXT: and t0, a1, t0
-; RV32IM-NEXT: and t4, a1, t4
-; RV32IM-NEXT: and a4, a1, a4
-; RV32IM-NEXT: andi a1, a1, 1024
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: mul a6, a0, a6
-; RV32IM-NEXT: mul t1, a0, t1
-; RV32IM-NEXT: mul t5, a0, t5
-; RV32IM-NEXT: mul s2, a0, s2
-; RV32IM-NEXT: mul s3, a0, s3
-; RV32IM-NEXT: mul s4, a0, s4
-; RV32IM-NEXT: mul s5, a0, s5
-; RV32IM-NEXT: mul s6, a0, s6
-; RV32IM-NEXT: mul s7, a0, s7
-; RV32IM-NEXT: mul s8, a0, s8
-; RV32IM-NEXT: mul s9, a0, s9
-; RV32IM-NEXT: mul s10, a0, s10
-; RV32IM-NEXT: mul s1, a0, s1
-; RV32IM-NEXT: mul t2, a0, t2
-; RV32IM-NEXT: mul t3, a0, t3
+; RV32IM-NEXT: lui a7, 524288
+; RV32IM-NEXT: and a7, a1, a7
+; RV32IM-NEXT: and a2, a1, a2
+; RV32IM-NEXT: andi a1, a1, 1
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: and a1, a1, a0
+; RV32IM-NEXT: lw a3, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a1, a3
+; RV32IM-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw t3, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, t3
+; RV32IM-NEXT: lw t3, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, t3, t5
+; RV32IM-NEXT: xor s2, s2, s3
+; RV32IM-NEXT: mul a4, a0, a4
+; RV32IM-NEXT: mul a2, a0, a2
+; RV32IM-NEXT: xor a2, a2, a4
+; RV32IM-NEXT: mul a4, a0, s8
+; RV32IM-NEXT: xor a4, s4, a4
; RV32IM-NEXT: mul s0, a0, s0
-; RV32IM-NEXT: mul t6, a0, t6
-; RV32IM-NEXT: mul t0, a0, t0
-; RV32IM-NEXT: mul t4, a0, t4
-; RV32IM-NEXT: mul a0, a0, a4
-; RV32IM-NEXT: xor a4, t1, t5
-; RV32IM-NEXT: xor t1, s5, s6
-; RV32IM-NEXT: xor t2, s1, t2
-; RV32IM-NEXT: xor a2, a7, a2
+; RV32IM-NEXT: xor s0, s6, s0
; RV32IM-NEXT: xor a1, a3, a1
-; RV32IM-NEXT: xor a0, a0, a5
-; RV32IM-NEXT: xor a3, a4, s2
-; RV32IM-NEXT: xor a4, t1, s7
-; RV32IM-NEXT: xor a5, t2, t3
-; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: xor a0, a0, a6
-; RV32IM-NEXT: xor a2, a3, s3
-; RV32IM-NEXT: xor a3, a4, s8
-; RV32IM-NEXT: xor a5, a5, s0
+; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, t5, a3
+; RV32IM-NEXT: xor t3, s2, t6
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: xor a2, a2, a5
+; RV32IM-NEXT: mul a5, a0, s9
+; RV32IM-NEXT: xor a4, a4, a5
+; RV32IM-NEXT: mul a5, a0, s1
+; RV32IM-NEXT: xor a5, s0, a5
+; RV32IM-NEXT: xor a1, a1, a3
+; RV32IM-NEXT: xor a3, t3, t4
+; RV32IM-NEXT: mul a6, a0, a6
+; RV32IM-NEXT: xor a2, a2, a6
+; RV32IM-NEXT: mul a6, a0, s10
+; RV32IM-NEXT: xor a4, a4, a6
+; RV32IM-NEXT: mul a6, a0, s5
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: xor a1, a1, a3
+; RV32IM-NEXT: mul a3, a0, t1
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: mul a3, a0, s7
+; RV32IM-NEXT: xor a3, a4, a3
+; RV32IM-NEXT: mul a4, a0, s11
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: mul a2, a0, t2
+; RV32IM-NEXT: xor a2, a3, a2
+; RV32IM-NEXT: mul a3, a0, ra
+; RV32IM-NEXT: xor a3, a4, a3
+; RV32IM-NEXT: mul a4, a0, t0
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: xor a1, a1, a3
+; RV32IM-NEXT: mul a0, a0, a7
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: xor a1, a2, s4
-; RV32IM-NEXT: xor a2, a3, s9
-; RV32IM-NEXT: xor a3, a5, t6
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: xor a1, a2, s10
-; RV32IM-NEXT: xor a2, a3, t0
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: xor a1, a2, t4
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s9, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s10, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: addi sp, sp, 48
+; RV32IM-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s4, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s6, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s7, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s8, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s9, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s11, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 80
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: clmul_i32:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: addi sp, sp, -96
-; RV64IM-NEXT: sd s0, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s1, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s2, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s3, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s4, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s5, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s6, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s7, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s8, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s9, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s10, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi t6, a1, 2
-; RV64IM-NEXT: andi s1, a1, 1
-; RV64IM-NEXT: andi a7, a1, 4
-; RV64IM-NEXT: andi t2, a1, 8
-; RV64IM-NEXT: andi t0, a1, 16
-; RV64IM-NEXT: andi t3, a1, 32
-; RV64IM-NEXT: andi a2, a1, 64
-; RV64IM-NEXT: andi t4, a1, 128
-; RV64IM-NEXT: andi s0, a1, 256
-; RV64IM-NEXT: andi a3, a1, 512
-; RV64IM-NEXT: li a4, 1
-; RV64IM-NEXT: lui a5, 1
-; RV64IM-NEXT: lui a6, 2
-; RV64IM-NEXT: lui t1, 4
-; RV64IM-NEXT: lui t5, 8
-; RV64IM-NEXT: lui s2, 16
-; RV64IM-NEXT: lui s3, 32
-; RV64IM-NEXT: lui s4, 64
-; RV64IM-NEXT: lui s5, 128
-; RV64IM-NEXT: lui s6, 256
+; RV64IM-NEXT: addi sp, sp, -144
+; RV64IM-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s2, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s3, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s4, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s5, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s6, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s7, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s8, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s9, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s10, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s11, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a2, a1, 62
+; RV64IM-NEXT: slliw s10, a0, 1
+; RV64IM-NEXT: slli a3, a1, 61
+; RV64IM-NEXT: slliw s11, a0, 2
+; RV64IM-NEXT: slli a6, a1, 60
+; RV64IM-NEXT: slliw s8, a0, 3
+; RV64IM-NEXT: slli t1, a1, 59
+; RV64IM-NEXT: slliw s9, a0, 4
+; RV64IM-NEXT: slli t4, a1, 58
+; RV64IM-NEXT: slliw s7, a0, 5
+; RV64IM-NEXT: slli t3, a1, 57
+; RV64IM-NEXT: slliw s5, a0, 6
+; RV64IM-NEXT: slli s4, a1, 56
+; RV64IM-NEXT: slliw s6, a0, 7
+; RV64IM-NEXT: slli t6, a1, 55
+; RV64IM-NEXT: slliw s2, a0, 8
+; RV64IM-NEXT: slli s0, a1, 54
+; RV64IM-NEXT: slliw s3, a0, 9
+; RV64IM-NEXT: slli t5, a1, 53
+; RV64IM-NEXT: slliw s1, a0, 10
+; RV64IM-NEXT: li a5, 1
+; RV64IM-NEXT: lui t2, 1
+; RV64IM-NEXT: lui a7, 4
+; RV64IM-NEXT: lui ra, 16
+; RV64IM-NEXT: srli a2, a2, 63
+; RV64IM-NEXT: neg a2, a2
+; RV64IM-NEXT: and a2, a2, s10
+; RV64IM-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a4, 32
+; RV64IM-NEXT: srli a3, a3, 63
+; RV64IM-NEXT: neg a3, a3
+; RV64IM-NEXT: and a2, a3, s11
+; RV64IM-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui t0, 64
+; RV64IM-NEXT: srli a6, a6, 63
+; RV64IM-NEXT: neg a6, a6
+; RV64IM-NEXT: and a2, a6, s8
+; RV64IM-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s8, 128
+; RV64IM-NEXT: srli t1, t1, 63
+; RV64IM-NEXT: neg t1, t1
+; RV64IM-NEXT: and a2, t1, s9
+; RV64IM-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s9, 256
+; RV64IM-NEXT: srli t4, t4, 63
+; RV64IM-NEXT: neg t4, t4
+; RV64IM-NEXT: and t4, t4, s7
; RV64IM-NEXT: lui s7, 512
-; RV64IM-NEXT: lui s8, 1024
-; RV64IM-NEXT: lui s9, 2048
-; RV64IM-NEXT: lui s10, 4096
-; RV64IM-NEXT: mulw t6, a0, t6
-; RV64IM-NEXT: mulw s1, a0, s1
-; RV64IM-NEXT: xor t6, s1, t6
-; RV64IM-NEXT: lui s1, 8192
-; RV64IM-NEXT: mulw a7, a0, a7
-; RV64IM-NEXT: mulw t2, a0, t2
-; RV64IM-NEXT: xor a7, a7, t2
-; RV64IM-NEXT: lui t2, 16384
-; RV64IM-NEXT: mulw t0, a0, t0
-; RV64IM-NEXT: mulw t3, a0, t3
-; RV64IM-NEXT: xor t0, t0, t3
-; RV64IM-NEXT: lui t3, 32768
-; RV64IM-NEXT: mulw t4, a0, t4
-; RV64IM-NEXT: mulw s0, a0, s0
-; RV64IM-NEXT: xor t4, t4, s0
-; RV64IM-NEXT: lui s0, 65536
-; RV64IM-NEXT: xor a7, t6, a7
-; RV64IM-NEXT: lui t6, 131072
-; RV64IM-NEXT: mulw a2, a0, a2
-; RV64IM-NEXT: xor a2, t0, a2
-; RV64IM-NEXT: lui t0, 262144
-; RV64IM-NEXT: mulw a3, a0, a3
-; RV64IM-NEXT: xor a3, t4, a3
-; RV64IM-NEXT: lui t4, 524288
-; RV64IM-NEXT: slli a4, a4, 11
+; RV64IM-NEXT: srli t3, t3, 63
+; RV64IM-NEXT: neg t3, t3
+; RV64IM-NEXT: and a2, t3, s5
+; RV64IM-NEXT: sd a2, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s5, 1024
+; RV64IM-NEXT: srli s4, s4, 63
+; RV64IM-NEXT: neg s4, s4
+; RV64IM-NEXT: and s4, s4, s6
+; RV64IM-NEXT: lui s6, 2048
+; RV64IM-NEXT: srli t6, t6, 63
+; RV64IM-NEXT: neg t6, t6
+; RV64IM-NEXT: and s2, t6, s2
+; RV64IM-NEXT: lui s11, 4096
+; RV64IM-NEXT: srli s0, s0, 63
+; RV64IM-NEXT: neg t6, s0
+; RV64IM-NEXT: and t6, t6, s3
+; RV64IM-NEXT: lui s0, 8192
+; RV64IM-NEXT: srli t5, t5, 63
+; RV64IM-NEXT: neg t5, t5
+; RV64IM-NEXT: and t5, t5, s1
+; RV64IM-NEXT: lui s1, 16384
+; RV64IM-NEXT: and s3, a1, ra
+; RV64IM-NEXT: and a4, a1, a4
+; RV64IM-NEXT: mulw s3, a0, s3
+; RV64IM-NEXT: mulw a4, a0, a4
+; RV64IM-NEXT: xor s3, s3, a4
+; RV64IM-NEXT: lui ra, 32768
+; RV64IM-NEXT: and a4, a1, s6
+; RV64IM-NEXT: and s6, a1, s11
+; RV64IM-NEXT: mulw a4, a0, a4
+; RV64IM-NEXT: mulw s6, a0, s6
+; RV64IM-NEXT: xor s6, a4, s6
+; RV64IM-NEXT: lui s11, 65536
+; RV64IM-NEXT: slli a2, a5, 11
+; RV64IM-NEXT: and a4, a1, t2
+; RV64IM-NEXT: lui a5, 2
; RV64IM-NEXT: and a5, a1, a5
-; RV64IM-NEXT: and a6, a1, a6
-; RV64IM-NEXT: and t1, a1, t1
-; RV64IM-NEXT: and t5, a1, t5
-; RV64IM-NEXT: and s2, a1, s2
-; RV64IM-NEXT: and s3, a1, s3
-; RV64IM-NEXT: and s4, a1, s4
-; RV64IM-NEXT: and s5, a1, s5
-; RV64IM-NEXT: and s6, a1, s6
-; RV64IM-NEXT: and s7, a1, s7
+; RV64IM-NEXT: and a6, a1, a7
+; RV64IM-NEXT: lui a3, 8
+; RV64IM-NEXT: and t1, a1, a3
+; RV64IM-NEXT: and t0, a1, t0
; RV64IM-NEXT: and s8, a1, s8
; RV64IM-NEXT: and s9, a1, s9
-; RV64IM-NEXT: and s10, a1, s10
+; RV64IM-NEXT: and s7, a1, s7
+; RV64IM-NEXT: and s5, a1, s5
+; RV64IM-NEXT: and s0, a1, s0
; RV64IM-NEXT: and s1, a1, s1
+; RV64IM-NEXT: and ra, a1, ra
+; RV64IM-NEXT: and s11, a1, s11
+; RV64IM-NEXT: lui s10, 131072
+; RV64IM-NEXT: and s10, a1, s10
+; RV64IM-NEXT: lui t2, 262144
; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: and t3, a1, t3
-; RV64IM-NEXT: and s0, a1, s0
-; RV64IM-NEXT: and t6, a1, t6
-; RV64IM-NEXT: and t0, a1, t0
-; RV64IM-NEXT: and t4, a1, t4
-; RV64IM-NEXT: and a4, a1, a4
-; RV64IM-NEXT: andi a1, a1, 1024
-; RV64IM-NEXT: mulw a1, a0, a1
+; RV64IM-NEXT: lui a7, 524288
+; RV64IM-NEXT: and a7, a1, a7
+; RV64IM-NEXT: and a2, a1, a2
+; RV64IM-NEXT: slli a1, a1, 63
+; RV64IM-NEXT: srai a1, a1, 63
+; RV64IM-NEXT: and a1, a1, a0
+; RV64IM-NEXT: ld a3, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a1, a3
+; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t3, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, t3
+; RV64IM-NEXT: ld t3, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t3, t4
+; RV64IM-NEXT: xor s2, s4, s2
+; RV64IM-NEXT: mulw a4, a0, a4
+; RV64IM-NEXT: mulw a2, a0, a2
+; RV64IM-NEXT: xor a2, a2, a4
+; RV64IM-NEXT: mulw a4, a0, t0
+; RV64IM-NEXT: xor a4, s3, a4
+; RV64IM-NEXT: mulw t0, a0, s0
+; RV64IM-NEXT: xor t0, s6, t0
+; RV64IM-NEXT: xor a1, a3, a1
+; RV64IM-NEXT: ld a3, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, t4, a3
+; RV64IM-NEXT: xor t3, s2, t6
; RV64IM-NEXT: mulw a5, a0, a5
+; RV64IM-NEXT: xor a2, a2, a5
+; RV64IM-NEXT: mulw a5, a0, s8
+; RV64IM-NEXT: xor a4, a4, a5
+; RV64IM-NEXT: mulw a5, a0, s1
+; RV64IM-NEXT: xor a5, t0, a5
+; RV64IM-NEXT: xor a1, a1, a3
+; RV64IM-NEXT: xor a3, t3, t5
; RV64IM-NEXT: mulw a6, a0, a6
-; RV64IM-NEXT: mulw t1, a0, t1
-; RV64IM-NEXT: mulw t5, a0, t5
-; RV64IM-NEXT: mulw s2, a0, s2
-; RV64IM-NEXT: mulw s3, a0, s3
-; RV64IM-NEXT: mulw s4, a0, s4
-; RV64IM-NEXT: mulw s5, a0, s5
-; RV64IM-NEXT: mulw s6, a0, s6
-; RV64IM-NEXT: mulw s7, a0, s7
-; RV64IM-NEXT: mulw s8, a0, s8
-; RV64IM-NEXT: mulw s9, a0, s9
-; RV64IM-NEXT: mulw s10, a0, s10
-; RV64IM-NEXT: mulw s1, a0, s1
-; RV64IM-NEXT: mulw t2, a0, t2
-; RV64IM-NEXT: mulw t3, a0, t3
-; RV64IM-NEXT: mulw s0, a0, s0
-; RV64IM-NEXT: mulw t6, a0, t6
-; RV64IM-NEXT: mulw t0, a0, t0
-; RV64IM-NEXT: mulw t4, a0, t4
-; RV64IM-NEXT: mulw a0, a0, a4
-; RV64IM-NEXT: xor a4, t1, t5
-; RV64IM-NEXT: xor t1, s5, s6
-; RV64IM-NEXT: xor t2, s1, t2
-; RV64IM-NEXT: xor a2, a7, a2
-; RV64IM-NEXT: xor a1, a3, a1
-; RV64IM-NEXT: xor a0, a0, a5
-; RV64IM-NEXT: xor a3, a4, s2
-; RV64IM-NEXT: xor a4, t1, s7
-; RV64IM-NEXT: xor a5, t2, t3
-; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: xor a0, a0, a6
-; RV64IM-NEXT: xor a2, a3, s3
-; RV64IM-NEXT: xor a3, a4, s8
-; RV64IM-NEXT: xor a5, a5, s0
-; RV64IM-NEXT: xor a0, a1, a0
-; RV64IM-NEXT: xor a1, a2, s4
-; RV64IM-NEXT: xor a2, a3, s9
-; RV64IM-NEXT: xor a3, a5, t6
-; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: xor a1, a2, s10
-; RV64IM-NEXT: xor a2, a3, t0
-; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: xor a1, a2, t4
-; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld s0, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s2, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s3, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s5, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s6, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s7, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s8, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s9, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: addi sp, sp, 96
-; RV64IM-NEXT: ret
-;
-; RV32IMZBS-LABEL: clmul_i32:
-; RV32IMZBS: # %bb.0:
-; RV32IMZBS-NEXT: addi sp, sp, -64
-; RV32IMZBS-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s2, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s3, 44(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s4, 40(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s5, 36(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s6, 32(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s7, 28(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s9, 20(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s10, 16(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s11, 12(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a1, 2
-; RV32IMZBS-NEXT: andi t2, a1, 1
-; RV32IMZBS-NEXT: andi a4, a1, 4
-; RV32IMZBS-NEXT: andi a6, a1, 8
-; RV32IMZBS-NEXT: andi a3, a1, 16
-; RV32IMZBS-NEXT: andi a5, a1, 32
-; RV32IMZBS-NEXT: andi a2, a1, 64
-; RV32IMZBS-NEXT: sw a2, 8(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a7, a1, 128
-; RV32IMZBS-NEXT: andi t1, a1, 256
-; RV32IMZBS-NEXT: bseti t3, zero, 11
-; RV32IMZBS-NEXT: lui t4, 1
-; RV32IMZBS-NEXT: lui t5, 2
-; RV32IMZBS-NEXT: lui t6, 4
-; RV32IMZBS-NEXT: lui s0, 8
-; RV32IMZBS-NEXT: lui s1, 16
-; RV32IMZBS-NEXT: lui s2, 32
-; RV32IMZBS-NEXT: lui s3, 64
-; RV32IMZBS-NEXT: lui s4, 128
-; RV32IMZBS-NEXT: lui s5, 256
-; RV32IMZBS-NEXT: lui s6, 512
-; RV32IMZBS-NEXT: lui s7, 1024
-; RV32IMZBS-NEXT: lui s8, 2048
-; RV32IMZBS-NEXT: lui s9, 4096
-; RV32IMZBS-NEXT: lui s10, 8192
-; RV32IMZBS-NEXT: lui s11, 16384
-; RV32IMZBS-NEXT: lui ra, 32768
-; RV32IMZBS-NEXT: mul t0, a0, t0
-; RV32IMZBS-NEXT: mul t2, a0, t2
-; RV32IMZBS-NEXT: xor a2, t2, t0
-; RV32IMZBS-NEXT: sw a2, 4(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t2, 65536
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: mul a6, a0, a6
-; RV32IMZBS-NEXT: xor t0, a4, a6
-; RV32IMZBS-NEXT: lui a6, 131072
-; RV32IMZBS-NEXT: mul a3, a0, a3
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: xor a4, a3, a5
-; RV32IMZBS-NEXT: lui a5, 262144
-; RV32IMZBS-NEXT: mul a7, a0, a7
-; RV32IMZBS-NEXT: mul t1, a0, t1
-; RV32IMZBS-NEXT: xor a7, a7, t1
-; RV32IMZBS-NEXT: lui t1, 524288
-; RV32IMZBS-NEXT: and t3, a1, t3
-; RV32IMZBS-NEXT: and t4, a1, t4
-; RV32IMZBS-NEXT: and t5, a1, t5
-; RV32IMZBS-NEXT: and t6, a1, t6
-; RV32IMZBS-NEXT: and s0, a1, s0
-; RV32IMZBS-NEXT: and s1, a1, s1
-; RV32IMZBS-NEXT: and s2, a1, s2
-; RV32IMZBS-NEXT: and s3, a1, s3
-; RV32IMZBS-NEXT: and s4, a1, s4
-; RV32IMZBS-NEXT: and s5, a1, s5
-; RV32IMZBS-NEXT: and s6, a1, s6
-; RV32IMZBS-NEXT: and s7, a1, s7
-; RV32IMZBS-NEXT: and s8, a1, s8
-; RV32IMZBS-NEXT: and s9, a1, s9
-; RV32IMZBS-NEXT: and s10, a1, s10
-; RV32IMZBS-NEXT: and s11, a1, s11
-; RV32IMZBS-NEXT: and ra, a1, ra
-; RV32IMZBS-NEXT: and t2, a1, t2
-; RV32IMZBS-NEXT: and a6, a1, a6
-; RV32IMZBS-NEXT: and a5, a1, a5
-; RV32IMZBS-NEXT: and t1, a1, t1
-; RV32IMZBS-NEXT: andi a2, a1, 512
-; RV32IMZBS-NEXT: andi a1, a1, 1024
-; RV32IMZBS-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a0, a3
-; RV32IMZBS-NEXT: mul a2, a0, a2
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: mul t3, a0, t3
-; RV32IMZBS-NEXT: mul t4, a0, t4
-; RV32IMZBS-NEXT: mul t5, a0, t5
-; RV32IMZBS-NEXT: mul t6, a0, t6
-; RV32IMZBS-NEXT: mul s0, a0, s0
-; RV32IMZBS-NEXT: mul s1, a0, s1
-; RV32IMZBS-NEXT: mul s2, a0, s2
-; RV32IMZBS-NEXT: mul s3, a0, s3
-; RV32IMZBS-NEXT: mul s4, a0, s4
-; RV32IMZBS-NEXT: mul s5, a0, s5
-; RV32IMZBS-NEXT: mul s6, a0, s6
-; RV32IMZBS-NEXT: mul s7, a0, s7
-; RV32IMZBS-NEXT: mul s8, a0, s8
-; RV32IMZBS-NEXT: mul s9, a0, s9
-; RV32IMZBS-NEXT: mul s10, a0, s10
-; RV32IMZBS-NEXT: mul s11, a0, s11
-; RV32IMZBS-NEXT: mul ra, a0, ra
-; RV32IMZBS-NEXT: mul t2, a0, t2
-; RV32IMZBS-NEXT: mul a6, a0, a6
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: mul a0, a0, t1
-; RV32IMZBS-NEXT: lw t1, 4(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t1, t0
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: xor a2, a7, a2
-; RV32IMZBS-NEXT: xor a4, t3, t4
-; RV32IMZBS-NEXT: xor s0, s0, s1
-; RV32IMZBS-NEXT: xor a7, s5, s6
-; RV32IMZBS-NEXT: xor t1, s11, ra
-; RV32IMZBS-NEXT: xor a3, t0, a3
-; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: xor a2, a4, t5
-; RV32IMZBS-NEXT: xor a4, s0, s2
-; RV32IMZBS-NEXT: xor a7, a7, s7
-; RV32IMZBS-NEXT: xor t0, t1, t2
-; RV32IMZBS-NEXT: xor a1, a3, a1
-; RV32IMZBS-NEXT: xor a2, a2, t6
-; RV32IMZBS-NEXT: xor a3, a4, s3
-; RV32IMZBS-NEXT: xor a4, a7, s8
-; RV32IMZBS-NEXT: xor a6, t0, a6
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: xor a2, a3, s4
-; RV32IMZBS-NEXT: xor a3, a4, s9
-; RV32IMZBS-NEXT: xor a4, a6, a5
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: xor a2, a3, s10
-; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: xor a2, a2, a6
+; RV64IM-NEXT: mulw a6, a0, s9
+; RV64IM-NEXT: xor a4, a4, a6
+; RV64IM-NEXT: mulw a6, a0, ra
+; RV64IM-NEXT: xor a5, a5, a6
+; RV64IM-NEXT: xor a1, a1, a3
+; RV64IM-NEXT: mulw a3, a0, t1
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: mulw a3, a0, s7
+; RV64IM-NEXT: xor a3, a4, a3
+; RV64IM-NEXT: mulw a4, a0, s11
+; RV64IM-NEXT: xor a4, a5, a4
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: mulw a2, a0, s5
+; RV64IM-NEXT: xor a2, a3, a2
+; RV64IM-NEXT: mulw a3, a0, s10
+; RV64IM-NEXT: xor a3, a4, a3
+; RV64IM-NEXT: mulw a4, a0, t2
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: xor a3, a3, a4
+; RV64IM-NEXT: xor a1, a1, a3
+; RV64IM-NEXT: mulw a0, a0, a7
+; RV64IM-NEXT: xor a0, a1, a0
+; RV64IM-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s2, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s3, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s4, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s5, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s6, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s7, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s8, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s9, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s10, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 144
+; RV64IM-NEXT: ret
+;
+; RV32IMZBS-LABEL: clmul_i32:
+; RV32IMZBS: # %bb.0:
+; RV32IMZBS-NEXT: addi sp, sp, -80
+; RV32IMZBS-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s2, 64(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s3, 60(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s4, 56(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s5, 52(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s6, 48(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s7, 44(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s8, 40(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s9, 36(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: mv a6, a0
+; RV32IMZBS-NEXT: slli a3, a0, 1
+; RV32IMZBS-NEXT: andi s11, a1, 2
+; RV32IMZBS-NEXT: slli a0, a0, 2
+; RV32IMZBS-NEXT: andi ra, a1, 4
+; RV32IMZBS-NEXT: slli a5, a6, 3
+; RV32IMZBS-NEXT: andi s10, a1, 8
+; RV32IMZBS-NEXT: slli a4, a6, 4
+; RV32IMZBS-NEXT: andi s9, a1, 16
+; RV32IMZBS-NEXT: slli a7, a6, 5
+; RV32IMZBS-NEXT: andi s8, a1, 32
+; RV32IMZBS-NEXT: slli t0, a6, 6
+; RV32IMZBS-NEXT: andi s6, a1, 64
+; RV32IMZBS-NEXT: slli t1, a6, 7
+; RV32IMZBS-NEXT: andi s4, a1, 128
+; RV32IMZBS-NEXT: slli t2, a6, 8
+; RV32IMZBS-NEXT: andi s1, a1, 256
+; RV32IMZBS-NEXT: slli t3, a6, 9
+; RV32IMZBS-NEXT: andi s0, a1, 512
+; RV32IMZBS-NEXT: slli t4, a6, 10
+; RV32IMZBS-NEXT: andi t5, a1, 1024
+; RV32IMZBS-NEXT: slli t6, a6, 11
+; RV32IMZBS-NEXT: not a2, a1
+; RV32IMZBS-NEXT: slli s2, a6, 12
+; RV32IMZBS-NEXT: slli s3, a6, 13
+; RV32IMZBS-NEXT: slli s5, a6, 14
+; RV32IMZBS-NEXT: slli s7, a6, 15
+; RV32IMZBS-NEXT: seqz s11, s11
+; RV32IMZBS-NEXT: addi s11, s11, -1
+; RV32IMZBS-NEXT: and a3, s11, a3
+; RV32IMZBS-NEXT: sw a3, 24(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s11, a6, 16
+; RV32IMZBS-NEXT: seqz a3, ra
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a0, a3, a0
+; RV32IMZBS-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli ra, a6, 17
+; RV32IMZBS-NEXT: seqz a3, s10
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 16(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s10, a6, 18
+; RV32IMZBS-NEXT: seqz a3, s9
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 12(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s9, a6, 19
+; RV32IMZBS-NEXT: seqz a3, s8
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a0, a3, a7
+; RV32IMZBS-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, a6, 20
+; RV32IMZBS-NEXT: seqz s6, s6
+; RV32IMZBS-NEXT: addi s6, s6, -1
+; RV32IMZBS-NEXT: and a0, s6, t0
+; RV32IMZBS-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s6, a6, 21
+; RV32IMZBS-NEXT: seqz s4, s4
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: and a0, s4, t1
+; RV32IMZBS-NEXT: sw a0, 0(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s4, a6, 22
+; RV32IMZBS-NEXT: seqz s1, s1
+; RV32IMZBS-NEXT: addi s1, s1, -1
+; RV32IMZBS-NEXT: and t2, s1, t2
+; RV32IMZBS-NEXT: slli a0, a6, 23
+; RV32IMZBS-NEXT: seqz s0, s0
+; RV32IMZBS-NEXT: addi s0, s0, -1
+; RV32IMZBS-NEXT: and t3, s0, t3
+; RV32IMZBS-NEXT: slli a3, a6, 24
+; RV32IMZBS-NEXT: seqz t5, t5
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and t4, t5, t4
+; RV32IMZBS-NEXT: bexti t5, a2, 11
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and s1, t5, t6
+; RV32IMZBS-NEXT: bexti t5, a2, 12
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and s2, t5, s2
+; RV32IMZBS-NEXT: bexti t5, a2, 13
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and s0, t5, s3
+; RV32IMZBS-NEXT: bexti t5, a2, 14
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and t6, t5, s5
+; RV32IMZBS-NEXT: bexti t5, a2, 15
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and t5, t5, s7
+; RV32IMZBS-NEXT: bexti s3, a2, 16
+; RV32IMZBS-NEXT: addi s3, s3, -1
+; RV32IMZBS-NEXT: and s7, s3, s11
+; RV32IMZBS-NEXT: bexti s3, a2, 17
+; RV32IMZBS-NEXT: addi s3, s3, -1
+; RV32IMZBS-NEXT: and s8, s3, ra
+; RV32IMZBS-NEXT: bexti s3, a2, 18
+; RV32IMZBS-NEXT: addi s3, s3, -1
+; RV32IMZBS-NEXT: and s10, s3, s10
+; RV32IMZBS-NEXT: bexti s3, a2, 19
+; RV32IMZBS-NEXT: addi s3, s3, -1
+; RV32IMZBS-NEXT: and s5, s3, s9
+; RV32IMZBS-NEXT: bexti s3, a2, 20
+; RV32IMZBS-NEXT: addi s3, s3, -1
+; RV32IMZBS-NEXT: and s3, s3, a5
+; RV32IMZBS-NEXT: bexti a5, a2, 21
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and ra, a5, s6
+; RV32IMZBS-NEXT: bexti a5, a2, 22
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and s6, a5, s4
+; RV32IMZBS-NEXT: bexti a5, a2, 23
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a7, a5, a0
+; RV32IMZBS-NEXT: bexti a5, a2, 24
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and t0, a5, a3
+; RV32IMZBS-NEXT: bexti a5, a2, 25
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli s4, a6, 25
+; RV32IMZBS-NEXT: and s9, a5, s4
+; RV32IMZBS-NEXT: bexti a5, a2, 26
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli s4, a6, 26
+; RV32IMZBS-NEXT: and s11, a5, s4
+; RV32IMZBS-NEXT: bexti a5, a2, 27
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli s4, a6, 27
+; RV32IMZBS-NEXT: and a5, a5, s4
+; RV32IMZBS-NEXT: bexti s4, a2, 28
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: slli a0, a6, 28
+; RV32IMZBS-NEXT: and a3, s4, a0
+; RV32IMZBS-NEXT: bexti s4, a2, 29
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: slli a4, a6, 29
+; RV32IMZBS-NEXT: and a4, s4, a4
+; RV32IMZBS-NEXT: bexti a2, a2, 30
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli s4, a6, 30
+; RV32IMZBS-NEXT: and a2, a2, s4
+; RV32IMZBS-NEXT: andi s4, a1, 1
+; RV32IMZBS-NEXT: seqz s4, s4
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: and s4, s4, a6
+; RV32IMZBS-NEXT: slli a6, a6, 31
+; RV32IMZBS-NEXT: srli a1, a1, 31
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a1, a1, a6
+; RV32IMZBS-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, s4, a0
+; RV32IMZBS-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s4, 16(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, a0, s4
+; RV32IMZBS-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t1, 8(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, t1
+; RV32IMZBS-NEXT: lw t1, 0(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t1, t2
+; RV32IMZBS-NEXT: xor t2, s1, s2
+; RV32IMZBS-NEXT: xor s1, s7, s8
+; RV32IMZBS-NEXT: xor a7, s6, a7
+; RV32IMZBS-NEXT: xor a2, a4, a2
+; RV32IMZBS-NEXT: xor a4, a6, s4
+; RV32IMZBS-NEXT: lw a6, 4(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a6
+; RV32IMZBS-NEXT: xor a6, t1, t3
+; RV32IMZBS-NEXT: xor t1, t2, s0
+; RV32IMZBS-NEXT: xor t2, s1, s10
+; RV32IMZBS-NEXT: xor a7, a7, t0
; RV32IMZBS-NEXT: xor a0, a4, a0
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s3, 44(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s4, 40(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s5, 36(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 32(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 28(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s8, 24(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s9, 20(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s10, 16(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s11, 12(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: addi sp, sp, 64
+; RV32IMZBS-NEXT: xor a4, a6, t4
+; RV32IMZBS-NEXT: xor a6, t1, t6
+; RV32IMZBS-NEXT: xor t0, t2, s5
+; RV32IMZBS-NEXT: xor a7, a7, s9
+; RV32IMZBS-NEXT: xor a0, a0, a4
+; RV32IMZBS-NEXT: xor a4, a6, t5
+; RV32IMZBS-NEXT: xor a6, t0, s3
+; RV32IMZBS-NEXT: xor a7, a7, s11
+; RV32IMZBS-NEXT: xor a0, a0, a4
+; RV32IMZBS-NEXT: xor a4, a6, ra
+; RV32IMZBS-NEXT: xor a5, a7, a5
+; RV32IMZBS-NEXT: xor a0, a0, a4
+; RV32IMZBS-NEXT: xor a3, a5, a3
+; RV32IMZBS-NEXT: xor a0, a0, a3
+; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 64(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s3, 60(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s4, 56(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s5, 52(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s6, 48(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 44(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 40(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 36(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s10, 32(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s11, 28(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: addi sp, sp, 80
; RV32IMZBS-NEXT: ret
;
; RV64IMZBS-LABEL: clmul_i32:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -128
-; RV64IMZBS-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi t0, a1, 2
-; RV64IMZBS-NEXT: andi t2, a1, 1
-; RV64IMZBS-NEXT: andi a4, a1, 4
-; RV64IMZBS-NEXT: andi a6, a1, 8
-; RV64IMZBS-NEXT: andi a3, a1, 16
-; RV64IMZBS-NEXT: andi a5, a1, 32
-; RV64IMZBS-NEXT: andi a2, a1, 64
-; RV64IMZBS-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a7, a1, 128
-; RV64IMZBS-NEXT: andi t1, a1, 256
-; RV64IMZBS-NEXT: bseti t3, zero, 11
-; RV64IMZBS-NEXT: lui t4, 1
-; RV64IMZBS-NEXT: lui t5, 2
-; RV64IMZBS-NEXT: lui t6, 4
-; RV64IMZBS-NEXT: lui s0, 8
-; RV64IMZBS-NEXT: lui s1, 16
-; RV64IMZBS-NEXT: lui s2, 32
-; RV64IMZBS-NEXT: lui s3, 64
-; RV64IMZBS-NEXT: lui s4, 128
-; RV64IMZBS-NEXT: lui s5, 256
-; RV64IMZBS-NEXT: lui s6, 512
-; RV64IMZBS-NEXT: lui s7, 1024
-; RV64IMZBS-NEXT: lui s8, 2048
-; RV64IMZBS-NEXT: lui s9, 4096
-; RV64IMZBS-NEXT: lui s10, 8192
-; RV64IMZBS-NEXT: lui s11, 16384
-; RV64IMZBS-NEXT: lui ra, 32768
-; RV64IMZBS-NEXT: mulw t0, a0, t0
-; RV64IMZBS-NEXT: mulw t2, a0, t2
-; RV64IMZBS-NEXT: xor a2, t2, t0
-; RV64IMZBS-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui t2, 65536
-; RV64IMZBS-NEXT: mulw a4, a0, a4
-; RV64IMZBS-NEXT: mulw a6, a0, a6
-; RV64IMZBS-NEXT: xor t0, a4, a6
-; RV64IMZBS-NEXT: lui a6, 131072
-; RV64IMZBS-NEXT: mulw a3, a0, a3
-; RV64IMZBS-NEXT: mulw a5, a0, a5
-; RV64IMZBS-NEXT: xor a4, a3, a5
-; RV64IMZBS-NEXT: lui a5, 262144
-; RV64IMZBS-NEXT: mulw a7, a0, a7
-; RV64IMZBS-NEXT: mulw t1, a0, t1
-; RV64IMZBS-NEXT: xor a7, a7, t1
-; RV64IMZBS-NEXT: lui t1, 524288
-; RV64IMZBS-NEXT: and t3, a1, t3
-; RV64IMZBS-NEXT: and t4, a1, t4
-; RV64IMZBS-NEXT: and t5, a1, t5
-; RV64IMZBS-NEXT: and t6, a1, t6
-; RV64IMZBS-NEXT: and s0, a1, s0
-; RV64IMZBS-NEXT: and s1, a1, s1
-; RV64IMZBS-NEXT: and s2, a1, s2
-; RV64IMZBS-NEXT: and s3, a1, s3
-; RV64IMZBS-NEXT: and s4, a1, s4
-; RV64IMZBS-NEXT: and s5, a1, s5
-; RV64IMZBS-NEXT: and s6, a1, s6
-; RV64IMZBS-NEXT: and s7, a1, s7
-; RV64IMZBS-NEXT: and s8, a1, s8
-; RV64IMZBS-NEXT: and s9, a1, s9
-; RV64IMZBS-NEXT: and s10, a1, s10
-; RV64IMZBS-NEXT: and s11, a1, s11
+; RV64IMZBS-NEXT: addi sp, sp, -176
+; RV64IMZBS-NEXT: sd ra, 168(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s0, 160(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 152(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 144(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s3, 136(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s4, 128(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s5, 120(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s6, 112(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s7, 104(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s8, 96(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s9, 88(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s10, 80(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s11, 72(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a1, 1
+; RV64IMZBS-NEXT: slliw a5, a0, 1
+; RV64IMZBS-NEXT: bexti a3, a1, 2
+; RV64IMZBS-NEXT: slliw a7, a0, 2
+; RV64IMZBS-NEXT: bexti a4, a1, 3
+; RV64IMZBS-NEXT: sd a4, 48(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slliw a4, a0, 3
+; RV64IMZBS-NEXT: bexti t1, a1, 4
+; RV64IMZBS-NEXT: slliw t4, a0, 4
+; RV64IMZBS-NEXT: bexti t2, a1, 5
+; RV64IMZBS-NEXT: slliw t5, a0, 5
+; RV64IMZBS-NEXT: bexti a6, a1, 6
+; RV64IMZBS-NEXT: slliw t3, a0, 6
+; RV64IMZBS-NEXT: bexti t6, a1, 7
+; RV64IMZBS-NEXT: slliw s1, a0, 7
+; RV64IMZBS-NEXT: bexti s0, a1, 8
+; RV64IMZBS-NEXT: slliw s2, a0, 8
+; RV64IMZBS-NEXT: bexti s3, a1, 9
+; RV64IMZBS-NEXT: slliw s5, a0, 9
+; RV64IMZBS-NEXT: bexti s4, a1, 10
+; RV64IMZBS-NEXT: slliw s6, a0, 10
+; RV64IMZBS-NEXT: bexti s8, a1, 11
+; RV64IMZBS-NEXT: slliw s11, a0, 11
+; RV64IMZBS-NEXT: bexti s7, a1, 12
+; RV64IMZBS-NEXT: slliw s9, a0, 12
+; RV64IMZBS-NEXT: bexti s10, a1, 13
+; RV64IMZBS-NEXT: slliw ra, a0, 13
+; RV64IMZBS-NEXT: neg a2, a2
+; RV64IMZBS-NEXT: and a2, a2, a5
+; RV64IMZBS-NEXT: sd a2, 64(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti t0, a1, 14
+; RV64IMZBS-NEXT: neg a3, a3
+; RV64IMZBS-NEXT: and a2, a3, a7
+; RV64IMZBS-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slliw a5, a0, 14
+; RV64IMZBS-NEXT: ld a2, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: neg a3, a2
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a4, a1, 15
+; RV64IMZBS-NEXT: neg a3, t1
+; RV64IMZBS-NEXT: and a2, a3, t4
+; RV64IMZBS-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slliw t4, a0, 15
+; RV64IMZBS-NEXT: neg a7, t2
+; RV64IMZBS-NEXT: and a2, a7, t5
+; RV64IMZBS-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti t5, a1, 16
+; RV64IMZBS-NEXT: neg a6, a6
+; RV64IMZBS-NEXT: and a2, a6, t3
+; RV64IMZBS-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slliw a2, a0, 16
+; RV64IMZBS-NEXT: neg a3, t6
+; RV64IMZBS-NEXT: and a3, a3, s1
+; RV64IMZBS-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti t6, a1, 17
+; RV64IMZBS-NEXT: neg t1, s0
+; RV64IMZBS-NEXT: and t3, t1, s2
+; RV64IMZBS-NEXT: slliw s2, a0, 17
+; RV64IMZBS-NEXT: neg t1, s3
+; RV64IMZBS-NEXT: and a3, t1, s5
+; RV64IMZBS-NEXT: sd a3, 8(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti s3, a1, 18
+; RV64IMZBS-NEXT: neg t2, s4
+; RV64IMZBS-NEXT: and t2, t2, s6
+; RV64IMZBS-NEXT: slliw s5, a0, 18
+; RV64IMZBS-NEXT: neg s0, s8
+; RV64IMZBS-NEXT: and s11, s0, s11
+; RV64IMZBS-NEXT: bexti s8, a1, 19
+; RV64IMZBS-NEXT: neg s0, s7
+; RV64IMZBS-NEXT: and s1, s0, s9
+; RV64IMZBS-NEXT: slliw s0, a0, 19
+; RV64IMZBS-NEXT: neg s4, s10
+; RV64IMZBS-NEXT: and s9, s4, ra
+; RV64IMZBS-NEXT: bexti s10, a1, 20
+; RV64IMZBS-NEXT: neg t0, t0
+; RV64IMZBS-NEXT: and a7, t0, a5
+; RV64IMZBS-NEXT: slliw t0, a0, 20
+; RV64IMZBS-NEXT: neg a4, a4
+; RV64IMZBS-NEXT: and t4, a4, t4
+; RV64IMZBS-NEXT: bexti a4, a1, 21
+; RV64IMZBS-NEXT: neg t5, t5
+; RV64IMZBS-NEXT: and s4, t5, a2
+; RV64IMZBS-NEXT: slliw a2, a0, 21
+; RV64IMZBS-NEXT: neg t5, t6
+; RV64IMZBS-NEXT: and s7, t5, s2
+; RV64IMZBS-NEXT: bexti t5, a1, 22
+; RV64IMZBS-NEXT: neg t6, s3
+; RV64IMZBS-NEXT: and s6, t6, s5
+; RV64IMZBS-NEXT: slliw t6, a0, 22
+; RV64IMZBS-NEXT: neg s2, s8
+; RV64IMZBS-NEXT: and s5, s2, s0
+; RV64IMZBS-NEXT: bexti s0, a1, 23
+; RV64IMZBS-NEXT: neg s2, s10
+; RV64IMZBS-NEXT: and s2, s2, t0
+; RV64IMZBS-NEXT: slliw t0, a0, 23
+; RV64IMZBS-NEXT: neg a4, a4
+; RV64IMZBS-NEXT: and s3, a4, a2
+; RV64IMZBS-NEXT: bexti a2, a1, 24
+; RV64IMZBS-NEXT: neg a4, t5
+; RV64IMZBS-NEXT: and a5, a4, t6
+; RV64IMZBS-NEXT: slliw t5, a0, 24
+; RV64IMZBS-NEXT: neg t6, s0
+; RV64IMZBS-NEXT: and t0, t6, t0
+; RV64IMZBS-NEXT: bexti t6, a1, 25
+; RV64IMZBS-NEXT: neg a2, a2
+; RV64IMZBS-NEXT: and a4, a2, t5
+; RV64IMZBS-NEXT: slliw t5, a0, 25
+; RV64IMZBS-NEXT: neg t6, t6
+; RV64IMZBS-NEXT: and t5, t6, t5
+; RV64IMZBS-NEXT: bexti t6, a1, 26
+; RV64IMZBS-NEXT: neg t6, t6
+; RV64IMZBS-NEXT: slliw s0, a0, 26
+; RV64IMZBS-NEXT: and s8, t6, s0
+; RV64IMZBS-NEXT: bexti t6, a1, 27
+; RV64IMZBS-NEXT: neg t6, t6
+; RV64IMZBS-NEXT: slliw s0, a0, 27
+; RV64IMZBS-NEXT: and t6, t6, s0
+; RV64IMZBS-NEXT: bexti s0, a1, 28
+; RV64IMZBS-NEXT: neg s0, s0
+; RV64IMZBS-NEXT: slliw s10, a0, 28
+; RV64IMZBS-NEXT: and s0, s0, s10
+; RV64IMZBS-NEXT: bexti s10, a1, 29
+; RV64IMZBS-NEXT: neg s10, s10
+; RV64IMZBS-NEXT: slliw ra, a0, 29
+; RV64IMZBS-NEXT: and s10, s10, ra
+; RV64IMZBS-NEXT: bexti ra, a1, 30
+; RV64IMZBS-NEXT: neg ra, ra
+; RV64IMZBS-NEXT: slliw a6, a0, 30
+; RV64IMZBS-NEXT: and a6, ra, a6
+; RV64IMZBS-NEXT: lui ra, 524288
; RV64IMZBS-NEXT: and ra, a1, ra
-; RV64IMZBS-NEXT: and t2, a1, t2
-; RV64IMZBS-NEXT: and a6, a1, a6
-; RV64IMZBS-NEXT: and a5, a1, a5
-; RV64IMZBS-NEXT: and t1, a1, t1
-; RV64IMZBS-NEXT: andi a2, a1, 512
-; RV64IMZBS-NEXT: andi a1, a1, 1024
-; RV64IMZBS-NEXT: ld a3, 16(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mulw a3, a0, a3
-; RV64IMZBS-NEXT: mulw a2, a0, a2
-; RV64IMZBS-NEXT: mulw a1, a0, a1
-; RV64IMZBS-NEXT: mulw t3, a0, t3
-; RV64IMZBS-NEXT: mulw t4, a0, t4
-; RV64IMZBS-NEXT: mulw t5, a0, t5
-; RV64IMZBS-NEXT: mulw t6, a0, t6
-; RV64IMZBS-NEXT: mulw s0, a0, s0
-; RV64IMZBS-NEXT: mulw s1, a0, s1
-; RV64IMZBS-NEXT: mulw s2, a0, s2
-; RV64IMZBS-NEXT: mulw s3, a0, s3
-; RV64IMZBS-NEXT: mulw s4, a0, s4
-; RV64IMZBS-NEXT: mulw s5, a0, s5
-; RV64IMZBS-NEXT: mulw s6, a0, s6
-; RV64IMZBS-NEXT: mulw s7, a0, s7
-; RV64IMZBS-NEXT: mulw s8, a0, s8
-; RV64IMZBS-NEXT: mulw s9, a0, s9
-; RV64IMZBS-NEXT: mulw s10, a0, s10
-; RV64IMZBS-NEXT: mulw s11, a0, s11
-; RV64IMZBS-NEXT: mulw ra, a0, ra
-; RV64IMZBS-NEXT: mulw t2, a0, t2
-; RV64IMZBS-NEXT: mulw a6, a0, a6
-; RV64IMZBS-NEXT: mulw a5, a0, a5
-; RV64IMZBS-NEXT: mulw a0, a0, t1
-; RV64IMZBS-NEXT: ld t1, 8(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: xor a2, a7, a2
-; RV64IMZBS-NEXT: xor a4, t3, t4
-; RV64IMZBS-NEXT: xor s0, s0, s1
-; RV64IMZBS-NEXT: xor a7, s5, s6
-; RV64IMZBS-NEXT: xor t1, s11, ra
-; RV64IMZBS-NEXT: xor a3, t0, a3
-; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: xor a2, a4, t5
-; RV64IMZBS-NEXT: xor a4, s0, s2
-; RV64IMZBS-NEXT: xor a7, a7, s7
-; RV64IMZBS-NEXT: xor t0, t1, t2
+; RV64IMZBS-NEXT: slli a1, a1, 63
+; RV64IMZBS-NEXT: srai a1, a1, 63
+; RV64IMZBS-NEXT: and a1, a1, a0
+; RV64IMZBS-NEXT: ld a3, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a1, a3
+; RV64IMZBS-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a1, a2
+; RV64IMZBS-NEXT: ld a2, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t1, 24(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, t1
+; RV64IMZBS-NEXT: ld t1, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t1, t3
+; RV64IMZBS-NEXT: xor s1, s11, s1
+; RV64IMZBS-NEXT: xor s4, s4, s7
+; RV64IMZBS-NEXT: xor a5, a5, t0
+; RV64IMZBS-NEXT: xor a6, s10, a6
; RV64IMZBS-NEXT: xor a1, a3, a1
-; RV64IMZBS-NEXT: xor a2, a2, t6
-; RV64IMZBS-NEXT: xor a3, a4, s3
-; RV64IMZBS-NEXT: xor a4, a7, s8
-; RV64IMZBS-NEXT: xor a6, t0, a6
+; RV64IMZBS-NEXT: ld a3, 32(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, a3
+; RV64IMZBS-NEXT: ld a3, 8(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, t3, a3
+; RV64IMZBS-NEXT: xor t0, s1, s9
+; RV64IMZBS-NEXT: xor t1, s4, s6
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: xor a1, a1, a2
+; RV64IMZBS-NEXT: xor a2, a3, t2
+; RV64IMZBS-NEXT: xor a3, t0, a7
+; RV64IMZBS-NEXT: xor a5, t1, s5
+; RV64IMZBS-NEXT: xor a4, a4, t5
+; RV64IMZBS-NEXT: slliw a0, a0, 31
+; RV64IMZBS-NEXT: sext.w ra, ra
+; RV64IMZBS-NEXT: seqz a7, ra
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: and a0, a7, a0
; RV64IMZBS-NEXT: xor a1, a1, a2
-; RV64IMZBS-NEXT: xor a2, a3, s4
-; RV64IMZBS-NEXT: xor a3, a4, s9
-; RV64IMZBS-NEXT: xor a4, a6, a5
+; RV64IMZBS-NEXT: xor a2, a3, t4
+; RV64IMZBS-NEXT: xor a3, a5, s2
+; RV64IMZBS-NEXT: xor a4, a4, s8
; RV64IMZBS-NEXT: xor a1, a1, a2
-; RV64IMZBS-NEXT: xor a2, a3, s10
+; RV64IMZBS-NEXT: xor a2, a3, s3
+; RV64IMZBS-NEXT: xor a3, a4, t6
; RV64IMZBS-NEXT: xor a1, a1, a2
-; RV64IMZBS-NEXT: xor a0, a4, a0
+; RV64IMZBS-NEXT: xor a3, a3, s0
+; RV64IMZBS-NEXT: xor a1, a1, a3
+; RV64IMZBS-NEXT: xor a0, a6, a0
; RV64IMZBS-NEXT: xor a0, a1, a0
-; RV64IMZBS-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 128
+; RV64IMZBS-NEXT: ld ra, 168(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 152(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 144(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 136(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s4, 128(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s5, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s6, 112(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s7, 104(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s8, 96(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s9, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s10, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s11, 72(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 176
; RV64IMZBS-NEXT: ret
%res = call i32 @llvm.clmul.i32(i32 %a, i32 %b)
ret i32 %res
@@ -3304,827 +3979,970 @@ define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
;
; RV32IM-LABEL: clmul_i64:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -272
-; RV32IM-NEXT: sw ra, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s0, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s1, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s2, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s3, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s4, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s5, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s6, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s7, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s8, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s9, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s10, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s11, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mv t1, a1
-; RV32IM-NEXT: srli a7, a0, 8
-; RV32IM-NEXT: lui s11, 16
-; RV32IM-NEXT: srli t0, a0, 24
-; RV32IM-NEXT: srli a1, a2, 8
+; RV32IM-NEXT: addi sp, sp, -368
+; RV32IM-NEXT: sw ra, 364(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s0, 360(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 356(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s2, 352(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s3, 348(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s4, 344(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s5, 340(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s6, 336(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s7, 332(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s8, 328(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s9, 324(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s10, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s11, 316(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mv s4, a2
+; RV32IM-NEXT: mv s7, a1
+; RV32IM-NEXT: mv a1, a0
+; RV32IM-NEXT: srli t0, a0, 8
+; RV32IM-NEXT: srli t1, a0, 24
+; RV32IM-NEXT: srli t3, a2, 8
; RV32IM-NEXT: srli t2, a2, 24
-; RV32IM-NEXT: andi t3, a2, 2
-; RV32IM-NEXT: andi t5, a2, 1
-; RV32IM-NEXT: andi t6, a2, 4
-; RV32IM-NEXT: andi s0, a2, 8
-; RV32IM-NEXT: andi s1, a2, 16
-; RV32IM-NEXT: andi s2, a2, 32
-; RV32IM-NEXT: andi t4, a2, 128
-; RV32IM-NEXT: andi s4, a2, 256
-; RV32IM-NEXT: andi a4, a3, 2
-; RV32IM-NEXT: andi a5, a3, 1
-; RV32IM-NEXT: andi s7, a3, 4
-; RV32IM-NEXT: andi s8, a3, 8
-; RV32IM-NEXT: mul a6, t1, t3
-; RV32IM-NEXT: mul s3, t1, t5
-; RV32IM-NEXT: mul s5, t1, t6
-; RV32IM-NEXT: mul s6, t1, s0
-; RV32IM-NEXT: mul s9, t1, s1
-; RV32IM-NEXT: xor a6, s3, a6
-; RV32IM-NEXT: mul s3, t1, s2
-; RV32IM-NEXT: xor s5, s5, s6
-; RV32IM-NEXT: mul s6, t1, t4
-; RV32IM-NEXT: xor s3, s9, s3
-; RV32IM-NEXT: mul s9, t1, s4
-; RV32IM-NEXT: xor s6, s6, s9
-; RV32IM-NEXT: andi s9, a3, 16
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: xor a4, a5, a4
-; RV32IM-NEXT: andi s10, a3, 32
-; RV32IM-NEXT: mul a5, a0, s7
-; RV32IM-NEXT: mul s7, a0, s8
-; RV32IM-NEXT: xor a5, a5, s7
-; RV32IM-NEXT: andi s8, a3, 128
-; RV32IM-NEXT: mul s7, a0, s9
-; RV32IM-NEXT: mul s9, a0, s10
-; RV32IM-NEXT: xor s7, s7, s9
-; RV32IM-NEXT: andi s9, a3, 256
-; RV32IM-NEXT: mul s8, a0, s8
-; RV32IM-NEXT: mul s9, a0, s9
-; RV32IM-NEXT: xor s8, s8, s9
-; RV32IM-NEXT: mul t3, a0, t3
-; RV32IM-NEXT: mul t5, a0, t5
-; RV32IM-NEXT: xor t5, t5, t3
-; RV32IM-NEXT: andi t3, a2, 64
-; RV32IM-NEXT: mul t6, a0, t6
-; RV32IM-NEXT: mul s0, a0, s0
-; RV32IM-NEXT: xor s0, t6, s0
-; RV32IM-NEXT: andi t6, a2, 512
-; RV32IM-NEXT: mul s1, a0, s1
-; RV32IM-NEXT: mul s2, a0, s2
-; RV32IM-NEXT: xor s1, s1, s2
-; RV32IM-NEXT: addi s9, s11, -256
-; RV32IM-NEXT: sw s9, 216(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t4, a0, t4
-; RV32IM-NEXT: mul s2, a0, s4
-; RV32IM-NEXT: xor t4, t4, s2
-; RV32IM-NEXT: mul s2, t1, t3
-; RV32IM-NEXT: and a7, a7, s9
-; RV32IM-NEXT: or a7, a7, t0
-; RV32IM-NEXT: sw a7, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a7, t1, t6
-; RV32IM-NEXT: and t0, a1, s9
-; RV32IM-NEXT: or a1, t0, t2
-; RV32IM-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s9, 4
-; RV32IM-NEXT: and t2, a2, s9
-; RV32IM-NEXT: xor a1, a6, s5
-; RV32IM-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s5, 8
-; RV32IM-NEXT: and s4, a2, s5
-; RV32IM-NEXT: xor a1, s3, s2
-; RV32IM-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a6, t1, t2
-; RV32IM-NEXT: xor a1, s6, a7
-; RV32IM-NEXT: sw a1, 196(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a7, t1, s4
-; RV32IM-NEXT: xor a1, a6, a7
-; RV32IM-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 256
-; RV32IM-NEXT: lui s6, 128
-; RV32IM-NEXT: and t0, a2, s6
-; RV32IM-NEXT: and a1, a2, a1
-; RV32IM-NEXT: lui s10, 256
-; RV32IM-NEXT: mul a6, t1, t0
-; RV32IM-NEXT: mul a7, t1, a1
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: sw a6, 188(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a6, 8192
-; RV32IM-NEXT: lui a7, 16384
-; RV32IM-NEXT: and a6, a2, a6
-; RV32IM-NEXT: lui s11, 8192
-; RV32IM-NEXT: and a7, a2, a7
-; RV32IM-NEXT: lui ra, 16384
-; RV32IM-NEXT: mul s2, t1, a6
-; RV32IM-NEXT: mul s3, t1, a7
-; RV32IM-NEXT: xor s2, s2, s3
-; RV32IM-NEXT: sw s2, 180(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: sw a4, 184(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a3, 64
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: xor a4, s7, a4
-; RV32IM-NEXT: sw a4, 176(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a3, 512
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: xor a4, s8, a4
-; RV32IM-NEXT: sw a4, 172(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a3, s9
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: and a5, a3, s5
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: sw a4, 168(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a3, s6
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: and a5, a3, s10
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: sw a4, 164(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a3, s11
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: and a5, a3, ra
-; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: slli a4, s7, 1
+; RV32IM-NEXT: andi a5, a2, 2
+; RV32IM-NEXT: slli a7, s7, 2
+; RV32IM-NEXT: andi s0, a2, 4
+; RV32IM-NEXT: slli a6, s7, 3
+; RV32IM-NEXT: andi t4, a2, 8
+; RV32IM-NEXT: slli t6, a0, 1
+; RV32IM-NEXT: andi s1, a3, 2
+; RV32IM-NEXT: lui a0, 16
+; RV32IM-NEXT: addi s5, a0, -256
+; RV32IM-NEXT: and s6, a2, a0
+; RV32IM-NEXT: lui s3, 16
+; RV32IM-NEXT: lui a0, 32
+; RV32IM-NEXT: and a2, a2, a0
+; RV32IM-NEXT: lui s8, 2048
+; RV32IM-NEXT: and s2, s4, s8
+; RV32IM-NEXT: lui s9, 4096
+; RV32IM-NEXT: and t5, s4, s9
+; RV32IM-NEXT: and s3, a3, s3
+; RV32IM-NEXT: and a0, a3, a0
+; RV32IM-NEXT: sw s5, 312(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, t0, s5
+; RV32IM-NEXT: and t3, t3, s5
+; RV32IM-NEXT: mul s5, s7, s6
+; RV32IM-NEXT: or t0, t0, t1
+; RV32IM-NEXT: sw t0, 308(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t0, s7, a2
+; RV32IM-NEXT: or t1, t3, t2
+; RV32IM-NEXT: sw t1, 304(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t1, s7, s2
+; RV32IM-NEXT: xor t0, s5, t0
+; RV32IM-NEXT: sw t0, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t0, s7, t5
+; RV32IM-NEXT: xor t0, t1, t0
+; RV32IM-NEXT: sw t0, 296(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, a3, s8
+; RV32IM-NEXT: mul t1, a1, s3
+; RV32IM-NEXT: mul t2, a1, a0
+; RV32IM-NEXT: xor a0, t1, t2
+; RV32IM-NEXT: sw a0, 292(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t1, a3, s9
+; RV32IM-NEXT: mul t0, a1, t0
+; RV32IM-NEXT: mul t1, a1, t1
+; RV32IM-NEXT: xor a0, t0, t1
+; RV32IM-NEXT: sw a0, 288(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t0, a1, 2
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: seqz t1, s1
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: and a4, a5, a4
+; RV32IM-NEXT: sw a4, 276(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, t1, t6
+; RV32IM-NEXT: sw a0, 280(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a5, t6
+; RV32IM-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a4, a3, 4
+; RV32IM-NEXT: seqz a5, s0
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: and a0, a5, a7
+; RV32IM-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a4, t0
+; RV32IM-NEXT: sw a0, 264(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a5, t0
+; RV32IM-NEXT: sw a0, 268(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a4, a3, 8
+; RV32IM-NEXT: seqz a5, t4
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: and a0, a5, a6
+; RV32IM-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, a1, 3
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a5, a6
+; RV32IM-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a4, s4, 16
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: andi a5, a3, 16
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, s7, 4
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, a1, 4
+; RV32IM-NEXT: and a0, a5, a6
+; RV32IM-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a4, s4, 32
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: andi a5, a3, 32
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, s7, 5
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 212(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, a1, 5
+; RV32IM-NEXT: and a0, a5, a6
+; RV32IM-NEXT: sw a0, 216(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 220(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a4, s4, 64
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: andi a5, a3, 64
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, s7, 6
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, a1, 6
+; RV32IM-NEXT: and a0, a5, a6
+; RV32IM-NEXT: sw a0, 232(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a4, s4, 128
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: andi a5, a3, 128
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, s7, 7
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 180(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, a1, 7
+; RV32IM-NEXT: and a0, a5, a6
+; RV32IM-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a4, s4, 256
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: andi a5, a3, 256
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, s7, 8
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 164(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, a1, 8
+; RV32IM-NEXT: and a0, a5, a6
+; RV32IM-NEXT: sw a0, 168(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 172(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a4, s4, 512
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: andi a5, a3, 512
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, s7, 9
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 176(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, a1, 9
+; RV32IM-NEXT: and a0, a5, a6
+; RV32IM-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a4, s4, 1024
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: andi a5, a3, 1024
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, s7, 10
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 188(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, a1, 10
+; RV32IM-NEXT: and a0, a5, a6
+; RV32IM-NEXT: sw a0, 200(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a4, a6
+; RV32IM-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, a1, s6
+; RV32IM-NEXT: mul a5, a1, a2
; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: sw a4, 156(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a4, t5, s0
-; RV32IM-NEXT: sw a4, 160(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, t3
-; RV32IM-NEXT: xor a4, s1, a4
-; RV32IM-NEXT: sw a4, 152(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, t6
-; RV32IM-NEXT: xor a4, t4, a4
; RV32IM-NEXT: sw a4, 148(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, t2
-; RV32IM-NEXT: mul a5, a0, s4
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: sw a4, 144(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, t0
-; RV32IM-NEXT: mul a5, a0, a1
+; RV32IM-NEXT: mul a4, a1, s2
+; RV32IM-NEXT: mul a5, a1, t5
; RV32IM-NEXT: xor a4, a4, a5
; RV32IM-NEXT: sw a4, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, a6
-; RV32IM-NEXT: mul a5, a0, a7
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: sw a4, 132(sp) # 4-byte Folded Spill
; RV32IM-NEXT: li a4, 1
-; RV32IM-NEXT: slli s6, a4, 11
-; RV32IM-NEXT: andi a4, a3, 1024
-; RV32IM-NEXT: mul a1, a0, a4
-; RV32IM-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a2, 1024
-; RV32IM-NEXT: mul a1, t1, a4
-; RV32IM-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a4
-; RV32IM-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a0, a4, 11
+; RV32IM-NEXT: andi a4, s4, 1
+; RV32IM-NEXT: seqz a5, a4
+; RV32IM-NEXT: andi a4, a3, 1
+; RV32IM-NEXT: seqz ra, a4
+; RV32IM-NEXT: addi t1, a5, -1
+; RV32IM-NEXT: addi ra, ra, -1
+; RV32IM-NEXT: sw ra, 52(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a4, 1
-; RV32IM-NEXT: lui a5, 2
-; RV32IM-NEXT: lui a1, 32
-; RV32IM-NEXT: lui s3, 64
-; RV32IM-NEXT: lui t2, 512
-; RV32IM-NEXT: lui t3, 1024
-; RV32IM-NEXT: lui t4, 2048
-; RV32IM-NEXT: lui t5, 4096
-; RV32IM-NEXT: lui t6, 32768
-; RV32IM-NEXT: lui s0, 65536
-; RV32IM-NEXT: lui s1, 131072
-; RV32IM-NEXT: lui s2, 262144
-; RV32IM-NEXT: lui s4, 524288
+; RV32IM-NEXT: lui a6, 2
+; RV32IM-NEXT: lui a2, 4
+; RV32IM-NEXT: lui a5, 8
+; RV32IM-NEXT: lui t2, 64
+; RV32IM-NEXT: lui t3, 128
+; RV32IM-NEXT: lui t4, 256
+; RV32IM-NEXT: lui t5, 512
+; RV32IM-NEXT: lui t6, 1024
+; RV32IM-NEXT: lui s0, 8192
+; RV32IM-NEXT: lui s1, 16384
+; RV32IM-NEXT: lui s2, 32768
+; RV32IM-NEXT: lui s3, 65536
+; RV32IM-NEXT: lui s5, 131072
+; RV32IM-NEXT: lui s6, 262144
+; RV32IM-NEXT: lui s8, 524288
; RV32IM-NEXT: and a4, a3, a4
-; RV32IM-NEXT: lui s7, 1
-; RV32IM-NEXT: and a5, a3, a5
-; RV32IM-NEXT: lui s9, 2
-; RV32IM-NEXT: lui t0, 16
-; RV32IM-NEXT: and a6, a3, t0
-; RV32IM-NEXT: and a7, a3, a1
-; RV32IM-NEXT: lui s8, 32
-; RV32IM-NEXT: and a1, a3, s3
-; RV32IM-NEXT: lui s10, 64
+; RV32IM-NEXT: lui s10, 1
+; RV32IM-NEXT: and a6, a3, a6
+; RV32IM-NEXT: lui s11, 2
+; RV32IM-NEXT: and a7, a3, a2
+; RV32IM-NEXT: and t0, a3, a5
; RV32IM-NEXT: and t2, a3, t2
-; RV32IM-NEXT: lui s11, 512
; RV32IM-NEXT: and t3, a3, t3
-; RV32IM-NEXT: lui ra, 1024
; RV32IM-NEXT: and t4, a3, t4
; RV32IM-NEXT: and t5, a3, t5
; RV32IM-NEXT: and t6, a3, t6
; RV32IM-NEXT: and s0, a3, s0
; RV32IM-NEXT: and s1, a3, s1
-; RV32IM-NEXT: and s3, a3, s2
-; RV32IM-NEXT: and s5, a3, s4
-; RV32IM-NEXT: and s2, a3, s6
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: mul a3, a0, a5
-; RV32IM-NEXT: sw a3, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, a6
-; RV32IM-NEXT: sw a3, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, a7
-; RV32IM-NEXT: sw a3, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t2
-; RV32IM-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t3
-; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t4
-; RV32IM-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t5
-; RV32IM-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t6
-; RV32IM-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s0
-; RV32IM-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s1
-; RV32IM-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s3
-; RV32IM-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s5
-; RV32IM-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a2, s7
-; RV32IM-NEXT: mul t5, t1, a3
-; RV32IM-NEXT: mul a1, a0, a3
-; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a2, s9
-; RV32IM-NEXT: mul a1, t1, a3
-; RV32IM-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a3
-; RV32IM-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a2, t0
-; RV32IM-NEXT: mul t3, t1, a3
-; RV32IM-NEXT: mul s9, a0, a3
-; RV32IM-NEXT: and a3, a2, s8
-; RV32IM-NEXT: mul a1, t1, a3
-; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a3
-; RV32IM-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a2, s10
-; RV32IM-NEXT: mul a1, t1, a3
-; RV32IM-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a3
-; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a2, s11
-; RV32IM-NEXT: mul a7, t1, a3
-; RV32IM-NEXT: mul s4, a0, a3
-; RV32IM-NEXT: and a6, a2, ra
-; RV32IM-NEXT: mul s11, t1, a6
-; RV32IM-NEXT: mul a1, a0, a6
-; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 2048
-; RV32IM-NEXT: and a6, a2, a1
-; RV32IM-NEXT: mul a1, t1, a6
-; RV32IM-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a6
-; RV32IM-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 4096
-; RV32IM-NEXT: and a6, a2, a1
-; RV32IM-NEXT: mul a1, t1, a6
-; RV32IM-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a6
-; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 32768
-; RV32IM-NEXT: and a6, a2, a1
-; RV32IM-NEXT: mul a5, t1, a6
-; RV32IM-NEXT: mul s1, a0, a6
-; RV32IM-NEXT: lui a1, 65536
-; RV32IM-NEXT: and a6, a2, a1
-; RV32IM-NEXT: mul s7, t1, a6
-; RV32IM-NEXT: mul s8, a0, a6
-; RV32IM-NEXT: lui a1, 131072
-; RV32IM-NEXT: and t6, a2, a1
-; RV32IM-NEXT: mul s10, t1, t6
-; RV32IM-NEXT: mul ra, a0, t6
-; RV32IM-NEXT: lui a1, 262144
-; RV32IM-NEXT: and s5, a2, a1
-; RV32IM-NEXT: mul a1, t1, s5
-; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s5, a0, s5
-; RV32IM-NEXT: lui a1, 524288
-; RV32IM-NEXT: and a1, a2, a1
-; RV32IM-NEXT: mul a3, t1, a1
-; RV32IM-NEXT: sw a3, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a1, a2, s6
-; RV32IM-NEXT: mul t1, t1, a1
-; RV32IM-NEXT: mul s2, a0, s2
-; RV32IM-NEXT: mul t6, a0, a1
-; RV32IM-NEXT: slli a1, a0, 24
-; RV32IM-NEXT: lw a3, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, a0, a3
-; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or s3, a1, a0
-; RV32IM-NEXT: slli a1, a2, 24
-; RV32IM-NEXT: and a2, a2, a3
-; RV32IM-NEXT: slli a2, a2, 8
-; RV32IM-NEXT: or t2, a1, a2
-; RV32IM-NEXT: lw a0, 204(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, a0, a1
-; RV32IM-NEXT: lw a0, 196(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a0, a1
-; RV32IM-NEXT: xor t1, t1, t5
-; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, a0, t3
-; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a0, a7
-; RV32IM-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, a0, a5
-; RV32IM-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 176(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, a0, a1
-; RV32IM-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a5, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a0, a5
-; RV32IM-NEXT: xor s2, s2, a4
-; RV32IM-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a0, a1
-; RV32IM-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a0, a2
-; RV32IM-NEXT: lw a0, 156(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a0, a3
-; RV32IM-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 152(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a0, a4
-; RV32IM-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, s0
-; RV32IM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, t6, s0
-; RV32IM-NEXT: lw s0, 144(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s9, s0, s9
-; RV32IM-NEXT: lw s0, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s4, s0, s4
-; RV32IM-NEXT: lw s0, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s0, s1
-; RV32IM-NEXT: lw s0, 212(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or s0, s3, s0
-; RV32IM-NEXT: lw s3, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or t2, t2, s3
-; RV32IM-NEXT: xor a6, t4, a6
-; RV32IM-NEXT: lw t4, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t4
-; RV32IM-NEXT: lw t4, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t3, t4
-; RV32IM-NEXT: xor a7, a7, s11
-; RV32IM-NEXT: xor t0, t0, s7
-; RV32IM-NEXT: xor a5, t5, a5
-; RV32IM-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, s2, t4
-; RV32IM-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, t5
-; RV32IM-NEXT: lw t5, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t5
-; RV32IM-NEXT: lw t5, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t5
-; RV32IM-NEXT: xor a0, a4, a0
-; RV32IM-NEXT: lw a4, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, t6, a4
-; RV32IM-NEXT: lw t5, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, s9, t5
-; RV32IM-NEXT: lw t6, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, s4, t6
-; RV32IM-NEXT: xor s1, s1, s8
-; RV32IM-NEXT: xor a6, a6, t1
-; RV32IM-NEXT: lw t1, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t3, t1
-; RV32IM-NEXT: lw t3, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, t3
-; RV32IM-NEXT: xor t0, t0, s10
-; RV32IM-NEXT: xor a5, a5, t4
-; RV32IM-NEXT: lw t3, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, t3
-; RV32IM-NEXT: lw t3, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t3
-; RV32IM-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t3
-; RV32IM-NEXT: xor a0, a0, a4
-; RV32IM-NEXT: lw a4, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, t5, a4
-; RV32IM-NEXT: lw t3, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t6, t3
-; RV32IM-NEXT: xor t4, s1, ra
-; RV32IM-NEXT: xor a6, a6, t1
-; RV32IM-NEXT: lw t1, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, t1
-; RV32IM-NEXT: lw t1, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, t1
-; RV32IM-NEXT: xor a5, a5, a1
-; RV32IM-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a1
-; RV32IM-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a1
-; RV32IM-NEXT: xor a0, a0, a4
-; RV32IM-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, t3, a1
-; RV32IM-NEXT: xor t1, t4, s5
-; RV32IM-NEXT: lui a1, 61681
-; RV32IM-NEXT: addi t5, a1, -241
-; RV32IM-NEXT: srli t3, s0, 4
-; RV32IM-NEXT: and s0, s0, t5
-; RV32IM-NEXT: and t3, t3, t5
-; RV32IM-NEXT: slli s0, s0, 4
-; RV32IM-NEXT: or t3, t3, s0
-; RV32IM-NEXT: srli t4, t2, 4
-; RV32IM-NEXT: and t2, t2, t5
-; RV32IM-NEXT: and t4, t4, t5
-; RV32IM-NEXT: slli t2, t2, 4
-; RV32IM-NEXT: or t2, t4, t2
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, t0, a1
-; RV32IM-NEXT: xor a5, a5, a2
-; RV32IM-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a1
-; RV32IM-NEXT: xor a0, a0, a4
-; RV32IM-NEXT: sw a0, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, t1, a0
-; RV32IM-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a4, a6, a7
-; RV32IM-NEXT: xor a3, a5, a3
-; RV32IM-NEXT: xor a3, a3, a4
-; RV32IM-NEXT: sw a3, 204(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a4, 209715
-; RV32IM-NEXT: addi t4, a4, 819
-; RV32IM-NEXT: srli a5, t3, 2
-; RV32IM-NEXT: and a6, t3, t4
-; RV32IM-NEXT: and a5, a5, t4
-; RV32IM-NEXT: slli a6, a6, 2
-; RV32IM-NEXT: or a7, a5, a6
-; RV32IM-NEXT: srli a5, t2, 2
-; RV32IM-NEXT: and a6, t2, t4
-; RV32IM-NEXT: and a5, a5, t4
-; RV32IM-NEXT: slli a6, a6, 2
-; RV32IM-NEXT: or t0, a5, a6
-; RV32IM-NEXT: lui t6, 349525
-; RV32IM-NEXT: addi t6, t6, 1365
-; RV32IM-NEXT: srli t1, a7, 1
-; RV32IM-NEXT: and a7, a7, t6
-; RV32IM-NEXT: and t1, t1, t6
-; RV32IM-NEXT: slli a7, a7, 1
-; RV32IM-NEXT: or a7, t1, a7
-; RV32IM-NEXT: srli t1, t0, 1
-; RV32IM-NEXT: and t0, t0, t6
-; RV32IM-NEXT: and t1, t1, t6
-; RV32IM-NEXT: slli t0, t0, 1
-; RV32IM-NEXT: or t0, t1, t0
-; RV32IM-NEXT: and s0, t0, s6
-; RV32IM-NEXT: lui a0, 1
-; RV32IM-NEXT: and s2, t0, a0
-; RV32IM-NEXT: lui a0, 2
-; RV32IM-NEXT: and s3, t0, a0
+; RV32IM-NEXT: and s2, a3, s2
+; RV32IM-NEXT: and s3, a3, s3
+; RV32IM-NEXT: and s5, a3, s5
+; RV32IM-NEXT: and s6, a3, s6
+; RV32IM-NEXT: and s8, a3, s8
+; RV32IM-NEXT: and s9, a3, a0
+; RV32IM-NEXT: mv a2, a0
+; RV32IM-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a4
+; RV32IM-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a6
+; RV32IM-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a7
+; RV32IM-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, t0
+; RV32IM-NEXT: sw a0, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, t2
+; RV32IM-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, t3
+; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, t4
+; RV32IM-NEXT: sw a0, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, t5
+; RV32IM-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, t6
+; RV32IM-NEXT: sw a0, 152(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul ra, a1, s0
+; RV32IM-NEXT: mul a0, a1, s1
+; RV32IM-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, s2
+; RV32IM-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, s3
+; RV32IM-NEXT: sw a0, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, s5
+; RV32IM-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, s6
+; RV32IM-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, s8
+; RV32IM-NEXT: sw a0, 160(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, s10
+; RV32IM-NEXT: mul t5, s7, a3
+; RV32IM-NEXT: mul a0, a1, a3
+; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, s11
+; RV32IM-NEXT: mul a0, s7, a3
+; RV32IM-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a3
+; RV32IM-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a0, 4
-; RV32IM-NEXT: and s5, t0, a0
-; RV32IM-NEXT: lui a0, 8
-; RV32IM-NEXT: and s6, t0, a0
-; RV32IM-NEXT: lui a0, 16
-; RV32IM-NEXT: and s10, t0, a0
-; RV32IM-NEXT: lui a0, 32
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, a0
+; RV32IM-NEXT: mul a0, s7, a3
+; RV32IM-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a3
+; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, a5
+; RV32IM-NEXT: mul a0, s7, a3
+; RV32IM-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a3
+; RV32IM-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a0, 64
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, a0
+; RV32IM-NEXT: mul t3, s7, a3
+; RV32IM-NEXT: mul s5, a1, a3
; RV32IM-NEXT: lui a0, 128
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 188(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, a0
+; RV32IM-NEXT: mul a0, s7, a3
+; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a3
+; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a0, 256
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, a0
+; RV32IM-NEXT: mul a0, s7, a3
+; RV32IM-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a3
+; RV32IM-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a0, 512
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 180(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, a0
+; RV32IM-NEXT: mul a0, s7, a3
+; RV32IM-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a3
+; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a0, 1024
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 176(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 2048
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 172(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 4096
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 168(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, a0
+; RV32IM-NEXT: mul a0, s7, a3
+; RV32IM-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a3
+; RV32IM-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a0, 8192
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 164(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, s4, a0
+; RV32IM-NEXT: mul a7, s7, a3
+; RV32IM-NEXT: mul s2, a1, a3
; RV32IM-NEXT: lui a0, 16384
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 160(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, s4, a0
+; RV32IM-NEXT: mul s8, s7, t2
+; RV32IM-NEXT: mul s10, a1, t2
; RV32IM-NEXT: lui a0, 32768
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, s4, a0
+; RV32IM-NEXT: mul s11, s7, t6
+; RV32IM-NEXT: mul a0, a1, t6
+; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a0, 65536
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 152(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s6, s4, a0
+; RV32IM-NEXT: mul a0, s7, s6
+; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul s6, a1, s6
; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, s4, a0
+; RV32IM-NEXT: mul a3, s7, a0
+; RV32IM-NEXT: sw a3, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a0, 262144
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, s4, a0
+; RV32IM-NEXT: mul a3, s7, a0
+; RV32IM-NEXT: sw a3, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui a0, 524288
-; RV32IM-NEXT: and a0, t0, a0
-; RV32IM-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, t0, 2
-; RV32IM-NEXT: andi a1, t0, 1
-; RV32IM-NEXT: andi a2, t0, 4
-; RV32IM-NEXT: andi a3, t0, 8
-; RV32IM-NEXT: andi a4, t0, 16
-; RV32IM-NEXT: andi a5, t0, 32
-; RV32IM-NEXT: andi a6, t0, 64
-; RV32IM-NEXT: andi t1, t0, 128
-; RV32IM-NEXT: andi t2, t0, 256
-; RV32IM-NEXT: andi t3, t0, 512
-; RV32IM-NEXT: andi t0, t0, 1024
-; RV32IM-NEXT: mul a0, a7, a0
-; RV32IM-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul ra, a7, a1
-; RV32IM-NEXT: mul s11, a7, a2
-; RV32IM-NEXT: mul s8, a7, a3
-; RV32IM-NEXT: mul s7, a7, a4
-; RV32IM-NEXT: mul s4, a7, a5
-; RV32IM-NEXT: mul a0, a7, a6
-; RV32IM-NEXT: sw a0, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a7, t1
-; RV32IM-NEXT: sw a0, 200(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s1, a7, t2
-; RV32IM-NEXT: mul t3, a7, t3
-; RV32IM-NEXT: mul a0, a7, t0
-; RV32IM-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a7, s0
-; RV32IM-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a7, s2
-; RV32IM-NEXT: sw a0, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t1, a7, s3
-; RV32IM-NEXT: mul a6, a7, s5
-; RV32IM-NEXT: mul s2, a7, s6
-; RV32IM-NEXT: mul s10, a7, s10
-; RV32IM-NEXT: lw a0, 196(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a7, a0
-; RV32IM-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a7, a0
-; RV32IM-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a7, a0
-; RV32IM-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a7, a0
+; RV32IM-NEXT: and a0, s4, a0
+; RV32IM-NEXT: mul a3, s7, a0
+; RV32IM-NEXT: sw a3, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, s4, a2
+; RV32IM-NEXT: and a2, t1, s7
+; RV32IM-NEXT: mul s7, s7, a0
+; RV32IM-NEXT: lw a3, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a3, a3, a1
+; RV32IM-NEXT: mul s9, a1, s9
+; RV32IM-NEXT: and t1, t1, a1
+; RV32IM-NEXT: mul s1, a1, a0
+; RV32IM-NEXT: slli a0, a1, 24
+; RV32IM-NEXT: lw a4, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a1, a1, a4
+; RV32IM-NEXT: slli a1, a1, 8
+; RV32IM-NEXT: or s3, a0, a1
+; RV32IM-NEXT: slli a1, s4, 24
+; RV32IM-NEXT: and s4, s4, a4
+; RV32IM-NEXT: slli s4, s4, 8
+; RV32IM-NEXT: or t6, a1, s4
+; RV32IM-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a2, a0
+; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, a0, a1
+; RV32IM-NEXT: lw a0, 228(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a0, a1
; RV32IM-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a5, a7, a0
-; RV32IM-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t2, a7, a0
-; RV32IM-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s6, a7, a0
-; RV32IM-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a7, a0
-; RV32IM-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a7, a0
-; RV32IM-NEXT: lw a4, 160(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a7, a4
-; RV32IM-NEXT: lw t0, 156(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t0, a7, t0
-; RV32IM-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s0, a7, s0
-; RV32IM-NEXT: lw s3, 148(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s3, a7, s3
-; RV32IM-NEXT: lw s5, 144(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s5, a7, s5
-; RV32IM-NEXT: lw s9, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a7, a7, s9
-; RV32IM-NEXT: lw s9, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor ra, ra, s9
-; RV32IM-NEXT: xor s8, s11, s8
-; RV32IM-NEXT: xor s4, s7, s4
-; RV32IM-NEXT: xor t3, s1, t3
-; RV32IM-NEXT: xor a6, t1, a6
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: xor a1, ra, s8
-; RV32IM-NEXT: lw a3, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, s4, a3
-; RV32IM-NEXT: lw t1, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t3, t1
-; RV32IM-NEXT: xor a6, a6, s2
-; RV32IM-NEXT: xor a2, a2, a5
-; RV32IM-NEXT: xor a0, a0, a4
+; RV32IM-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, a0, a1
+; RV32IM-NEXT: xor t5, s7, t5
+; RV32IM-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a0, t3
+; RV32IM-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a0, a7
+; RV32IM-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a3, a0
+; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, a0, a1
+; RV32IM-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 216(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a0, a3
+; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a4, 168(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a0, a4
+; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s9, s9, a0
+; RV32IM-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a0, a5
+; RV32IM-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, a0, ra
+; RV32IM-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, t1, a0
+; RV32IM-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 220(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: lw a2, 208(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 172(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, s0
+; RV32IM-NEXT: lw s0, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, s0
+; RV32IM-NEXT: lw s0, 148(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s0, s5
+; RV32IM-NEXT: lw s0, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s2, s0, s2
+; RV32IM-NEXT: lw s0, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or s0, s3, s0
+; RV32IM-NEXT: lw s3, 304(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or t6, t6, s3
+; RV32IM-NEXT: xor t4, t4, s4
+; RV32IM-NEXT: lw s3, 224(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, t2, s3
+; RV32IM-NEXT: lw s3, 176(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, s3
+; RV32IM-NEXT: lw s3, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, t5, s3
+; RV32IM-NEXT: lw s3, 0(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, t3, s3
+; RV32IM-NEXT: xor a7, a7, s8
+; RV32IM-NEXT: xor a6, a6, s7
+; RV32IM-NEXT: lw s3, 232(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, s3
+; RV32IM-NEXT: lw s3, 184(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, s3
+; RV32IM-NEXT: lw s3, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, s9, s3
+; RV32IM-NEXT: lw s4, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, s4
+; RV32IM-NEXT: lw s4, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, ra, s4
+; RV32IM-NEXT: xor a0, t1, a0
+; RV32IM-NEXT: lw t1, 240(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, t1
+; RV32IM-NEXT: lw t1, 196(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, t1
+; RV32IM-NEXT: lw t1, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, s1, t1
+; RV32IM-NEXT: lw s1, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s5, s1
+; RV32IM-NEXT: xor s2, s2, s10
+; RV32IM-NEXT: xor t2, t4, t2
+; RV32IM-NEXT: lw t4, 188(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, t4
+; RV32IM-NEXT: lw t4, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, t5, t4
+; RV32IM-NEXT: lw t5, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, t3, t5
+; RV32IM-NEXT: xor a7, a7, s11
+; RV32IM-NEXT: xor a3, a6, a3
+; RV32IM-NEXT: lw a6, 200(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a6
+; RV32IM-NEXT: lw a6, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, s3, a6
+; RV32IM-NEXT: lw t5, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, t5
+; RV32IM-NEXT: lw t5, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, s4, t5
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, t1, a2
+; RV32IM-NEXT: lw t1, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, s1, t1
+; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s2, s1
+; RV32IM-NEXT: xor t0, t2, t0
+; RV32IM-NEXT: lw t2, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, t4, t2
+; RV32IM-NEXT: lw t4, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, t3, t4
+; RV32IM-NEXT: lw t4, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, t4
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: lw a4, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a6, a4
+; RV32IM-NEXT: lw a6, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: lw a6, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, t5, a6
+; RV32IM-NEXT: xor a1, a0, a1
+; RV32IM-NEXT: lw a0, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a0
+; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, t1, a0
+; RV32IM-NEXT: xor t4, s1, s6
+; RV32IM-NEXT: lui a0, 61681
+; RV32IM-NEXT: addi a0, a0, -241
+; RV32IM-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli t5, s0, 4
+; RV32IM-NEXT: and s0, s0, a0
+; RV32IM-NEXT: and t5, t5, a0
+; RV32IM-NEXT: slli s0, s0, 4
+; RV32IM-NEXT: or t5, t5, s0
+; RV32IM-NEXT: srli s0, t6, 4
+; RV32IM-NEXT: and t6, t6, a0
+; RV32IM-NEXT: and s0, s0, a0
+; RV32IM-NEXT: slli t6, t6, 4
+; RV32IM-NEXT: or t6, s0, t6
+; RV32IM-NEXT: xor t0, t0, t2
+; RV32IM-NEXT: lw a0, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, t3, a0
+; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, a0
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: lw a4, 152(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a6, a0
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: lw a0, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, t1, a0
+; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, t4, a0
+; RV32IM-NEXT: xor t0, t0, t2
+; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, a0
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: lw a4, 156(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a6, a0
+; RV32IM-NEXT: xor a5, t0, a7
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: sw a1, 304(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a1, 209715
+; RV32IM-NEXT: addi a1, a1, 819
+; RV32IM-NEXT: sw a1, 296(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli a4, t5, 2
+; RV32IM-NEXT: and a6, t5, a1
+; RV32IM-NEXT: and a4, a4, a1
+; RV32IM-NEXT: slli a6, a6, 2
+; RV32IM-NEXT: or a4, a4, a6
+; RV32IM-NEXT: srli a6, t6, 2
+; RV32IM-NEXT: and a7, t6, a1
+; RV32IM-NEXT: and a6, a6, a1
+; RV32IM-NEXT: slli a7, a7, 2
+; RV32IM-NEXT: or a7, a6, a7
+; RV32IM-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a0
+; RV32IM-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a0
+; RV32IM-NEXT: xor a3, a3, a5
+; RV32IM-NEXT: sw a3, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 349525
+; RV32IM-NEXT: addi a0, a0, 1365
+; RV32IM-NEXT: sw a0, 292(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli t0, a4, 1
+; RV32IM-NEXT: and a4, a4, a0
+; RV32IM-NEXT: and t0, t0, a0
+; RV32IM-NEXT: slli a4, a4, 1
+; RV32IM-NEXT: or a4, t0, a4
+; RV32IM-NEXT: srli t0, a7, 1
+; RV32IM-NEXT: and a7, a7, a0
+; RV32IM-NEXT: and t0, t0, a0
+; RV32IM-NEXT: slli a7, a7, 1
+; RV32IM-NEXT: or a7, t0, a7
+; RV32IM-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and t0, a7, a0
+; RV32IM-NEXT: lui a0, 1
+; RV32IM-NEXT: and t1, a7, a0
+; RV32IM-NEXT: mul t0, a4, t0
+; RV32IM-NEXT: mul t1, a4, t1
+; RV32IM-NEXT: xor a0, t0, t1
+; RV32IM-NEXT: sw a0, 288(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 64
+; RV32IM-NEXT: and t1, a7, a0
+; RV32IM-NEXT: lui a0, 128
+; RV32IM-NEXT: and t2, a7, a0
+; RV32IM-NEXT: mul t1, a4, t1
+; RV32IM-NEXT: mul t2, a4, t2
+; RV32IM-NEXT: xor a0, t1, t2
+; RV32IM-NEXT: sw a0, 280(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 4096
+; RV32IM-NEXT: and t2, a7, a0
+; RV32IM-NEXT: lui a0, 8192
+; RV32IM-NEXT: and t3, a7, a0
+; RV32IM-NEXT: mul t2, a4, t2
+; RV32IM-NEXT: mul t3, a4, t3
+; RV32IM-NEXT: xor t2, t2, t3
+; RV32IM-NEXT: andi t3, a7, 2
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a4, 1
+; RV32IM-NEXT: and t3, t3, t4
+; RV32IM-NEXT: andi t4, a7, 4
+; RV32IM-NEXT: seqz t4, t4
+; RV32IM-NEXT: addi t4, t4, -1
+; RV32IM-NEXT: slli t5, a4, 2
+; RV32IM-NEXT: and t4, t4, t5
+; RV32IM-NEXT: andi t5, a7, 8
+; RV32IM-NEXT: seqz t5, t5
+; RV32IM-NEXT: addi t5, t5, -1
+; RV32IM-NEXT: slli t6, a4, 3
+; RV32IM-NEXT: and t5, t5, t6
+; RV32IM-NEXT: andi t6, a7, 16
+; RV32IM-NEXT: seqz t6, t6
+; RV32IM-NEXT: addi t6, t6, -1
+; RV32IM-NEXT: slli s0, a4, 4
+; RV32IM-NEXT: and t6, t6, s0
+; RV32IM-NEXT: andi s0, a7, 32
+; RV32IM-NEXT: seqz s0, s0
+; RV32IM-NEXT: addi s0, s0, -1
+; RV32IM-NEXT: slli s1, a4, 5
+; RV32IM-NEXT: and s1, s0, s1
+; RV32IM-NEXT: andi s0, a7, 64
+; RV32IM-NEXT: seqz s0, s0
+; RV32IM-NEXT: addi s0, s0, -1
+; RV32IM-NEXT: slli s2, a4, 6
+; RV32IM-NEXT: and s0, s0, s2
+; RV32IM-NEXT: andi s2, a7, 128
+; RV32IM-NEXT: seqz s2, s2
+; RV32IM-NEXT: addi s2, s2, -1
+; RV32IM-NEXT: slli s3, a4, 7
+; RV32IM-NEXT: and s2, s2, s3
+; RV32IM-NEXT: andi s3, a7, 256
+; RV32IM-NEXT: seqz s3, s3
+; RV32IM-NEXT: addi s3, s3, -1
+; RV32IM-NEXT: slli s4, a4, 8
+; RV32IM-NEXT: and s3, s3, s4
+; RV32IM-NEXT: andi s4, a7, 512
+; RV32IM-NEXT: seqz s4, s4
+; RV32IM-NEXT: addi s4, s4, -1
+; RV32IM-NEXT: slli s5, a4, 9
+; RV32IM-NEXT: and s4, s4, s5
+; RV32IM-NEXT: lui a0, 2
+; RV32IM-NEXT: and s5, a7, a0
+; RV32IM-NEXT: lui a0, 4
+; RV32IM-NEXT: and s6, a7, a0
+; RV32IM-NEXT: lui a0, 8
+; RV32IM-NEXT: and s7, a7, a0
+; RV32IM-NEXT: lui a0, 16
+; RV32IM-NEXT: and s8, a7, a0
+; RV32IM-NEXT: lui a0, 32
+; RV32IM-NEXT: and s9, a7, a0
+; RV32IM-NEXT: lui a0, 256
+; RV32IM-NEXT: and s10, a7, a0
+; RV32IM-NEXT: lui a0, 512
+; RV32IM-NEXT: and s11, a7, a0
+; RV32IM-NEXT: lui a0, 1024
+; RV32IM-NEXT: and ra, a7, a0
+; RV32IM-NEXT: lui a0, 2048
+; RV32IM-NEXT: and a0, a7, a0
+; RV32IM-NEXT: lui a1, 16384
+; RV32IM-NEXT: and a1, a7, a1
+; RV32IM-NEXT: lui a2, 32768
+; RV32IM-NEXT: and a2, a7, a2
+; RV32IM-NEXT: lui a3, 65536
+; RV32IM-NEXT: and a3, a7, a3
+; RV32IM-NEXT: lui a5, 131072
+; RV32IM-NEXT: and a5, a7, a5
+; RV32IM-NEXT: lui a6, 262144
+; RV32IM-NEXT: and a6, a7, a6
+; RV32IM-NEXT: lui t0, 524288
+; RV32IM-NEXT: and t0, a7, t0
+; RV32IM-NEXT: andi t1, a7, 1
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: mul s5, a4, s5
+; RV32IM-NEXT: mul s6, a4, s6
+; RV32IM-NEXT: mul s7, a4, s7
+; RV32IM-NEXT: mul s8, a4, s8
+; RV32IM-NEXT: mul s9, a4, s9
+; RV32IM-NEXT: mul s10, a4, s10
+; RV32IM-NEXT: mul s11, a4, s11
+; RV32IM-NEXT: mul ra, a4, ra
+; RV32IM-NEXT: mul a0, a4, a0
+; RV32IM-NEXT: mul a1, a4, a1
+; RV32IM-NEXT: mul a2, a4, a2
+; RV32IM-NEXT: mul a3, a4, a3
+; RV32IM-NEXT: mul a5, a4, a5
+; RV32IM-NEXT: mul a6, a4, a6
+; RV32IM-NEXT: mul t0, a4, t0
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: and t1, t1, a4
+; RV32IM-NEXT: slli a4, a4, 10
+; RV32IM-NEXT: andi a7, a7, 1024
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: and a4, a7, a4
+; RV32IM-NEXT: lw a7, 288(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, s5
+; RV32IM-NEXT: lw s5, 280(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s5, s10
+; RV32IM-NEXT: xor a1, t2, a1
+; RV32IM-NEXT: xor t1, t1, t3
+; RV32IM-NEXT: xor t2, t4, t5
+; RV32IM-NEXT: xor t3, t6, s1
+; RV32IM-NEXT: xor t4, s2, s3
+; RV32IM-NEXT: xor a7, a7, s6
+; RV32IM-NEXT: xor t5, s5, s11
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: xor a2, t1, t2
+; RV32IM-NEXT: xor t1, t3, s0
+; RV32IM-NEXT: xor t2, t4, s4
+; RV32IM-NEXT: xor a7, a7, s7
+; RV32IM-NEXT: xor t3, t5, ra
; RV32IM-NEXT: xor a1, a1, a3
-; RV32IM-NEXT: lw a3, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, t1, a3
-; RV32IM-NEXT: xor a4, a6, s10
-; RV32IM-NEXT: xor a2, a2, t2
-; RV32IM-NEXT: xor a0, a0, t0
-; RV32IM-NEXT: lw a5, 200(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, t1
+; RV32IM-NEXT: xor a3, t2, a4
+; RV32IM-NEXT: xor a4, a7, s8
+; RV32IM-NEXT: xor a0, t3, a0
; RV32IM-NEXT: xor a1, a1, a5
-; RV32IM-NEXT: lw a5, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a5
-; RV32IM-NEXT: lw a5, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: xor a2, a2, s6
-; RV32IM-NEXT: xor a0, a0, s0
-; RV32IM-NEXT: lw a5, 196(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: xor a0, a0, s3
-; RV32IM-NEXT: xor a3, a1, a3
-; RV32IM-NEXT: xor a3, a3, a4
-; RV32IM-NEXT: xor a0, a0, s5
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: xor a0, a0, a7
-; RV32IM-NEXT: lw a4, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a3, a2, a4
-; RV32IM-NEXT: xor a0, a2, a0
-; RV32IM-NEXT: srli a2, a2, 8
-; RV32IM-NEXT: and a2, a2, a4
-; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: xor a3, a4, s9
+; RV32IM-NEXT: xor a1, a1, a6
+; RV32IM-NEXT: xor a1, a1, t0
+; RV32IM-NEXT: xor a3, a2, a3
+; RV32IM-NEXT: xor a0, a3, a0
+; RV32IM-NEXT: lw a4, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a3, a0, a4
+; RV32IM-NEXT: xor a1, a0, a1
+; RV32IM-NEXT: srli a0, a0, 8
+; RV32IM-NEXT: and a0, a0, a4
+; RV32IM-NEXT: slli a2, a2, 24
; RV32IM-NEXT: slli a3, a3, 8
-; RV32IM-NEXT: or a1, a1, a3
-; RV32IM-NEXT: srli a0, a0, 24
+; RV32IM-NEXT: or a2, a2, a3
+; RV32IM-NEXT: srli a1, a1, 24
+; RV32IM-NEXT: or a0, a0, a1
; RV32IM-NEXT: or a0, a2, a0
-; RV32IM-NEXT: or a0, a1, a0
; RV32IM-NEXT: srli a1, a0, 4
-; RV32IM-NEXT: and a0, a0, t5
-; RV32IM-NEXT: and a1, a1, t5
+; RV32IM-NEXT: lw a2, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: and a1, a1, a2
; RV32IM-NEXT: slli a0, a0, 4
; RV32IM-NEXT: or a0, a1, a0
; RV32IM-NEXT: srli a1, a0, 2
-; RV32IM-NEXT: and a0, a0, t4
-; RV32IM-NEXT: and a1, a1, t4
+; RV32IM-NEXT: lw a2, 296(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: and a1, a1, a2
; RV32IM-NEXT: slli a0, a0, 2
; RV32IM-NEXT: or a0, a1, a0
; RV32IM-NEXT: lui a1, 349525
; RV32IM-NEXT: addi a1, a1, 1364
-; RV32IM-NEXT: and a2, a0, t6
+; RV32IM-NEXT: lw a2, 292(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a2, a0, a2
; RV32IM-NEXT: srli a0, a0, 1
; RV32IM-NEXT: and a0, a0, a1
; RV32IM-NEXT: slli a2, a2, 1
; RV32IM-NEXT: or a0, a0, a2
; RV32IM-NEXT: srli a0, a0, 1
-; RV32IM-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 300(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a0, a1
-; RV32IM-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a2
-; RV32IM-NEXT: lw ra, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s2, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s3, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s4, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s5, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s8, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s9, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s10, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s11, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: addi sp, sp, 272
+; RV32IM-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 304(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a2, a0
+; RV32IM-NEXT: lw ra, 364(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 356(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 352(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 348(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s4, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 340(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s6, 336(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s7, 332(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s8, 328(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s9, 324(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 320(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s11, 316(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 368
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: clmul_i64:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: addi sp, sp, -448
-; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi t2, a1, 2
-; RV64IM-NEXT: andi t4, a1, 1
-; RV64IM-NEXT: andi a6, a1, 4
-; RV64IM-NEXT: andi t0, a1, 8
-; RV64IM-NEXT: andi a5, a1, 16
-; RV64IM-NEXT: andi a7, a1, 32
-; RV64IM-NEXT: andi a3, a1, 64
-; RV64IM-NEXT: andi t1, a1, 128
-; RV64IM-NEXT: andi t3, a1, 256
-; RV64IM-NEXT: andi a4, a1, 512
-; RV64IM-NEXT: li a2, 1
-; RV64IM-NEXT: lui s7, 1
-; RV64IM-NEXT: lui t6, 2
-; RV64IM-NEXT: lui s0, 4
-; RV64IM-NEXT: lui s1, 8
-; RV64IM-NEXT: lui s2, 16
-; RV64IM-NEXT: lui s3, 32
-; RV64IM-NEXT: lui s4, 64
-; RV64IM-NEXT: lui s5, 128
-; RV64IM-NEXT: lui s6, 256
-; RV64IM-NEXT: lui s8, 512
-; RV64IM-NEXT: lui s9, 1024
-; RV64IM-NEXT: lui s10, 2048
-; RV64IM-NEXT: lui s11, 4096
-; RV64IM-NEXT: lui ra, 8192
-; RV64IM-NEXT: lui t5, 16384
-; RV64IM-NEXT: mul t2, a0, t2
-; RV64IM-NEXT: mul t4, a0, t4
-; RV64IM-NEXT: xor t2, t4, t2
-; RV64IM-NEXT: lui t4, 32768
-; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: addi sp, sp, -528
+; RV64IM-NEXT: sd ra, 520(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s0, 512(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 504(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s2, 496(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s3, 488(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s4, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s5, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s6, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s7, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s8, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s9, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s10, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s11, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t5, a0, 1
+; RV64IM-NEXT: andi s6, a1, 2
+; RV64IM-NEXT: slli s7, a0, 2
+; RV64IM-NEXT: andi s10, a1, 4
+; RV64IM-NEXT: slli s2, a0, 3
+; RV64IM-NEXT: andi s8, a1, 8
+; RV64IM-NEXT: slli s3, a0, 4
+; RV64IM-NEXT: andi s9, a1, 16
+; RV64IM-NEXT: slli t3, a0, 5
+; RV64IM-NEXT: andi s5, a1, 32
+; RV64IM-NEXT: slli t1, a0, 6
+; RV64IM-NEXT: andi s4, a1, 64
+; RV64IM-NEXT: slli a7, a0, 7
+; RV64IM-NEXT: andi t6, a1, 128
+; RV64IM-NEXT: slli a6, a0, 8
+; RV64IM-NEXT: andi t4, a1, 256
+; RV64IM-NEXT: slli a5, a0, 9
+; RV64IM-NEXT: andi t2, a1, 512
+; RV64IM-NEXT: slli a4, a0, 10
+; RV64IM-NEXT: andi t0, a1, 1024
+; RV64IM-NEXT: lui a2, 1
+; RV64IM-NEXT: lui s0, 2
+; RV64IM-NEXT: lui s1, 4
+; RV64IM-NEXT: lui a3, 8
+; RV64IM-NEXT: lui s11, 16
+; RV64IM-NEXT: lui ra, 32
+; RV64IM-NEXT: seqz s6, s6
+; RV64IM-NEXT: addi s6, s6, -1
+; RV64IM-NEXT: and t5, s6, t5
+; RV64IM-NEXT: sd t5, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui t5, 64
+; RV64IM-NEXT: seqz s6, s10
+; RV64IM-NEXT: addi s6, s6, -1
+; RV64IM-NEXT: and s6, s6, s7
+; RV64IM-NEXT: sd s6, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s6, 128
+; RV64IM-NEXT: seqz s7, s8
+; RV64IM-NEXT: addi s7, s7, -1
+; RV64IM-NEXT: and s2, s7, s2
+; RV64IM-NEXT: sd s2, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s2, 256
+; RV64IM-NEXT: seqz s7, s9
+; RV64IM-NEXT: addi s7, s7, -1
+; RV64IM-NEXT: and s3, s7, s3
+; RV64IM-NEXT: sd s3, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s3, 512
+; RV64IM-NEXT: seqz s5, s5
+; RV64IM-NEXT: addi s5, s5, -1
+; RV64IM-NEXT: and t3, s5, t3
+; RV64IM-NEXT: sd t3, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui t3, 1024
+; RV64IM-NEXT: seqz s4, s4
+; RV64IM-NEXT: addi s4, s4, -1
+; RV64IM-NEXT: and t1, s4, t1
+; RV64IM-NEXT: sd t1, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui t1, 2048
+; RV64IM-NEXT: seqz t6, t6
+; RV64IM-NEXT: addi t6, t6, -1
+; RV64IM-NEXT: and a7, t6, a7
+; RV64IM-NEXT: sd a7, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a7, 4096
+; RV64IM-NEXT: seqz t4, t4
+; RV64IM-NEXT: addi t4, t4, -1
+; RV64IM-NEXT: and a6, t4, a6
+; RV64IM-NEXT: sd a6, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a6, 8192
+; RV64IM-NEXT: seqz t2, t2
+; RV64IM-NEXT: addi t2, t2, -1
+; RV64IM-NEXT: and a5, t2, a5
+; RV64IM-NEXT: sd a5, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 16384
+; RV64IM-NEXT: seqz t0, t0
+; RV64IM-NEXT: addi t0, t0, -1
+; RV64IM-NEXT: and a4, t0, a4
+; RV64IM-NEXT: sd a4, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a4, 32768
+; RV64IM-NEXT: and t0, a1, s11
+; RV64IM-NEXT: and t2, a1, ra
; RV64IM-NEXT: mul t0, a0, t0
-; RV64IM-NEXT: xor a6, a6, t0
+; RV64IM-NEXT: mul t2, a0, t2
+; RV64IM-NEXT: xor t0, t0, t2
+; RV64IM-NEXT: sd t0, 336(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui t0, 65536
-; RV64IM-NEXT: mul a5, a0, a5
+; RV64IM-NEXT: and t1, a1, t1
+; RV64IM-NEXT: and a7, a1, a7
+; RV64IM-NEXT: mul t1, a0, t1
; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: xor a5, a5, a7
+; RV64IM-NEXT: xor a7, t1, a7
+; RV64IM-NEXT: sd a7, 328(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a7, 131072
-; RV64IM-NEXT: mul t1, a0, t1
-; RV64IM-NEXT: mul t3, a0, t3
-; RV64IM-NEXT: xor t1, t1, t3
-; RV64IM-NEXT: lui t3, 262144
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: xor a6, t2, a6
-; RV64IM-NEXT: sd a6, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a6, a2, 11
-; RV64IM-NEXT: sd a6, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s7, a1, s7
-; RV64IM-NEXT: and a6, a1, t6
-; RV64IM-NEXT: sd a6, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a1, a2
+; RV64IM-NEXT: sd a2, 320(sp) # 8-byte Folded Spill
; RV64IM-NEXT: and s0, a1, s0
+; RV64IM-NEXT: sd s0, 312(sp) # 8-byte Folded Spill
; RV64IM-NEXT: and s1, a1, s1
-; RV64IM-NEXT: and s2, a1, s2
-; RV64IM-NEXT: and s3, a1, s3
-; RV64IM-NEXT: and a6, a1, s4
-; RV64IM-NEXT: sd a6, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a6, a1, s5
-; RV64IM-NEXT: and t2, a1, s6
-; RV64IM-NEXT: and s8, a1, s8
-; RV64IM-NEXT: and t6, a1, s9
-; RV64IM-NEXT: sd t6, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t6, a1, s10
-; RV64IM-NEXT: sd t6, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t6, a1, s11
-; RV64IM-NEXT: sd t6, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t6, a1, ra
-; RV64IM-NEXT: and t5, a1, t5
-; RV64IM-NEXT: and t4, a1, t4
-; RV64IM-NEXT: sd t4, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t0, a1, t0
-; RV64IM-NEXT: sd t0, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a7, a1, a7
-; RV64IM-NEXT: sd a7, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a7, a1, t3
-; RV64IM-NEXT: sd a7, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: xor a3, a5, a3
-; RV64IM-NEXT: sd a3, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t4, a2, 32
-; RV64IM-NEXT: xor a3, t1, a4
-; RV64IM-NEXT: sd a3, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s4, a2, 33
-; RV64IM-NEXT: mul a3, a0, s0
-; RV64IM-NEXT: mul a4, a0, s1
-; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: sd a3, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s0, a2, 34
-; RV64IM-NEXT: mul a3, a0, a6
-; RV64IM-NEXT: mul a4, a0, t2
-; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s1, a2, 35
-; RV64IM-NEXT: mul a3, a0, t6
-; RV64IM-NEXT: mul a4, a0, t5
-; RV64IM-NEXT: xor a3, a3, a4
+; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a3, a1, a3
; RV64IM-NEXT: sd a3, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t5, a2, 36
-; RV64IM-NEXT: slli t6, a2, 37
-; RV64IM-NEXT: slli s5, a2, 38
-; RV64IM-NEXT: slli s6, a2, 39
-; RV64IM-NEXT: slli s9, a2, 40
-; RV64IM-NEXT: slli s10, a2, 41
-; RV64IM-NEXT: slli s11, a2, 42
-; RV64IM-NEXT: slli ra, a2, 43
-; RV64IM-NEXT: slli a3, a2, 44
-; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 45
+; RV64IM-NEXT: and a2, a1, t5
+; RV64IM-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a1, s6
+; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a1, s2
+; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a1, s3
+; RV64IM-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a1, t3
+; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a1, a6
+; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a5, a1, a5
+; RV64IM-NEXT: sd a5, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a4, a1, a4
+; RV64IM-NEXT: sd a4, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a1, t0
+; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a1, a7
+; RV64IM-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 262144
+; RV64IM-NEXT: and a2, a1, a2
+; RV64IM-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: li a2, 1
+; RV64IM-NEXT: slli t4, a2, 11
+; RV64IM-NEXT: slli s8, a2, 32
+; RV64IM-NEXT: slli a3, a2, 33
; RV64IM-NEXT: sd a3, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 46
+; RV64IM-NEXT: slli a3, a2, 34
; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 47
+; RV64IM-NEXT: slli s5, a2, 35
+; RV64IM-NEXT: slli a3, a2, 36
; RV64IM-NEXT: sd a3, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 48
+; RV64IM-NEXT: slli a3, a2, 37
; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 49
+; RV64IM-NEXT: slli a3, a2, 38
; RV64IM-NEXT: sd a3, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 50
+; RV64IM-NEXT: slli a3, a2, 39
; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 51
+; RV64IM-NEXT: slli t6, a2, 40
+; RV64IM-NEXT: slli s9, a2, 41
+; RV64IM-NEXT: slli s0, a2, 42
+; RV64IM-NEXT: slli a3, a2, 43
; RV64IM-NEXT: sd a3, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 52
+; RV64IM-NEXT: slli a3, a2, 44
; RV64IM-NEXT: sd a3, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 53
+; RV64IM-NEXT: slli a3, a2, 45
; RV64IM-NEXT: sd a3, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a3, a2, 54
-; RV64IM-NEXT: sd a3, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s6, a2, 46
+; RV64IM-NEXT: slli t5, a2, 47
+; RV64IM-NEXT: slli s2, a2, 48
+; RV64IM-NEXT: slli t3, a2, 49
+; RV64IM-NEXT: slli s11, a2, 50
+; RV64IM-NEXT: slli s10, a2, 51
+; RV64IM-NEXT: slli s7, a2, 52
+; RV64IM-NEXT: slli ra, a2, 53
+; RV64IM-NEXT: slli t2, a2, 54
; RV64IM-NEXT: slli t1, a2, 55
; RV64IM-NEXT: slli t0, a2, 56
; RV64IM-NEXT: slli a7, a2, 57
@@ -4133,1376 +4951,1644 @@ define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
; RV64IM-NEXT: slli a4, a2, 60
; RV64IM-NEXT: slli a3, a2, 61
; RV64IM-NEXT: slli a2, a2, 62
-; RV64IM-NEXT: ld t2, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t3, a1, t2
-; RV64IM-NEXT: and t2, a1, t4
-; RV64IM-NEXT: sd t2, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t4, a1, s4
+; RV64IM-NEXT: and t4, a1, t4
+; RV64IM-NEXT: and s8, a1, s8
+; RV64IM-NEXT: ld s4, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s1, a1, s4
+; RV64IM-NEXT: sd s1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld s4, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s4, a1, s4
+; RV64IM-NEXT: and s5, a1, s5
+; RV64IM-NEXT: ld s3, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s1, a1, s3
+; RV64IM-NEXT: sd s1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld s3, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s3, a1, s3
+; RV64IM-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s1, a1, s1
+; RV64IM-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld s1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s1, a1, s1
+; RV64IM-NEXT: and t6, a1, t6
+; RV64IM-NEXT: sd t6, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s9, a1, s9
; RV64IM-NEXT: and s0, a1, s0
; RV64IM-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s1, a1, s1
-; RV64IM-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t2, a1, t5
-; RV64IM-NEXT: sd t2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s1, a1, t6
-; RV64IM-NEXT: and t2, a1, s5
-; RV64IM-NEXT: sd t2, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t2, a1, s6
-; RV64IM-NEXT: sd t2, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s4, a1, s9
-; RV64IM-NEXT: and s5, a1, s10
-; RV64IM-NEXT: and s6, a1, s11
-; RV64IM-NEXT: and t6, a1, ra
-; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: sd t2, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld t2, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: sd t2, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld t2, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s10, a1, t2
-; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s11, a1, t2
-; RV64IM-NEXT: ld t2, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and ra, a1, t2
-; RV64IM-NEXT: ld t2, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: sd t2, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld t2, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: sd t2, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: sd t2, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld t2, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: sd t2, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld t2, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: sd t2, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld t2, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t6, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t6, a1, t6
+; RV64IM-NEXT: sd t6, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld t6, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t6, a1, t6
+; RV64IM-NEXT: sd t6, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld t6, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t6, a1, t6
+; RV64IM-NEXT: sd t6, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t6, a1, s6
+; RV64IM-NEXT: sd t6, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t5, a1, t5
+; RV64IM-NEXT: sd t5, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t5, a1, s2
+; RV64IM-NEXT: sd t5, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, a1, t3
+; RV64IM-NEXT: sd t3, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s11, a1, s11
+; RV64IM-NEXT: and s2, a1, s10
+; RV64IM-NEXT: and t3, a1, s7
+; RV64IM-NEXT: sd t3, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t5, a1, ra
; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: sd t2, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd t2, 56(sp) # 8-byte Folded Spill
; RV64IM-NEXT: and t1, a1, t1
; RV64IM-NEXT: and t0, a1, t0
-; RV64IM-NEXT: sd t0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd t0, 48(sp) # 8-byte Folded Spill
; RV64IM-NEXT: and a7, a1, a7
-; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a7, 40(sp) # 8-byte Folded Spill
; RV64IM-NEXT: and a6, a1, a6
; RV64IM-NEXT: and a5, a1, a5
-; RV64IM-NEXT: sd a5, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a5, 32(sp) # 8-byte Folded Spill
; RV64IM-NEXT: and a4, a1, a4
-; RV64IM-NEXT: sd a4, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 24(sp) # 8-byte Folded Spill
; RV64IM-NEXT: and a3, a1, a3
-; RV64IM-NEXT: sd a3, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
; RV64IM-NEXT: and a2, a1, a2
-; RV64IM-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a2, a1, 1024
+; RV64IM-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a1, 1
; RV64IM-NEXT: srliw a3, a1, 31
; RV64IM-NEXT: srli a1, a1, 63
-; RV64IM-NEXT: mul s9, a0, a2
+; RV64IM-NEXT: seqz a2, a2
; RV64IM-NEXT: slli a3, a3, 31
-; RV64IM-NEXT: slli a1, a1, 63
-; RV64IM-NEXT: mul s7, a0, s7
-; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: sd a2, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s2, a0, s2
-; RV64IM-NEXT: mul a2, a0, s3
-; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s0, a0, s8
-; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t2, a0, a2
-; RV64IM-NEXT: ld a2, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: sd a2, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a2, a0, a3
-; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a4, a1, 63
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s10, a0, a1
+; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s7, a0, a1
+; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s0, a0, a1
+; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a5, a0, t3
+; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul t6, a0, a3
+; RV64IM-NEXT: mul a1, a0, a4
+; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t0, a2, a0
+; RV64IM-NEXT: mul t3, a0, t4
+; RV64IM-NEXT: mul t4, a0, s8
+; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s6, a0, a1
+; RV64IM-NEXT: mul ra, a0, s4
+; RV64IM-NEXT: mul a1, a0, s5
+; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a0, a1
-; RV64IM-NEXT: mul t4, a0, t4
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s3
+; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul s3, a0, s1
+; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s5, a0, a1
+; RV64IM-NEXT: mul s8, a0, s9
; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t5, a0, a1
+; RV64IM-NEXT: mul s4, a0, a1
; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, s1
-; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s1, a0, a1
-; RV64IM-NEXT: mul s4, a0, s4
-; RV64IM-NEXT: mul s5, a0, s5
-; RV64IM-NEXT: mul s6, a0, s6
-; RV64IM-NEXT: mul a1, a0, t6
-; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a0, a1
; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, a1
+; RV64IM-NEXT: mul t2, a0, s11
+; RV64IM-NEXT: mul s2, a0, s2
; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s1, a0, a1
+; RV64IM-NEXT: mul s9, a0, t5
+; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, s10
-; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s10, a0, s11
-; RV64IM-NEXT: mul s11, a0, ra
-; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul ra, a0, a1
-; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t0, a0, a1
-; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t3, a0, a1
-; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s3, a0, a1
-; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a1, a0, t1
-; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a2, a0, a1
+; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: mul a3, a0, a6
-; RV64IM-NEXT: ld a4, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: ld a6, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a6, a0, a6
-; RV64IM-NEXT: ld t1, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t1, a0, t1
-; RV64IM-NEXT: ld t6, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t6, a0, t6
-; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s8, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a0, s8
-; RV64IM-NEXT: ld s8, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s9, s8, s9
-; RV64IM-NEXT: xor a5, a5, s7
-; RV64IM-NEXT: ld s7, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s7, s2
-; RV64IM-NEXT: ld s7, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s7, s0
-; RV64IM-NEXT: ld s7, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, s7, t2
-; RV64IM-NEXT: xor a7, a7, t4
-; RV64IM-NEXT: xor t4, s1, s4
-; RV64IM-NEXT: xor s1, s10, s11
-; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: xor a0, a0, s9
-; RV64IM-NEXT: ld a2, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a5, a2
-; RV64IM-NEXT: ld a5, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, s2, a5
-; RV64IM-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s0, s2
-; RV64IM-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, s2
-; RV64IM-NEXT: xor a7, a7, t5
-; RV64IM-NEXT: xor t4, t4, s5
-; RV64IM-NEXT: xor t5, s1, ra
-; RV64IM-NEXT: xor a1, a1, a3
-; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: ld a2, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a5, a2
-; RV64IM-NEXT: ld a3, 208(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, s0, a3
-; RV64IM-NEXT: ld a5, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, t2, a5
-; RV64IM-NEXT: ld t2, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t2
-; RV64IM-NEXT: xor t2, t4, s6
-; RV64IM-NEXT: xor t0, t5, t0
+; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, a6
+; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a0, a1
+; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a6, a0, a1
+; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul t1, a0, a1
+; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul t5, a0, a1
+; RV64IM-NEXT: ld a0, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, a0
+; RV64IM-NEXT: ld a0, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: ld a2, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, s11
+; RV64IM-NEXT: xor t3, t3, s10
+; RV64IM-NEXT: ld s10, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s7, s10, s7
+; RV64IM-NEXT: ld s10, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s10, s0
+; RV64IM-NEXT: xor t4, t6, t4
+; RV64IM-NEXT: xor t6, s3, s5
+; RV64IM-NEXT: xor a5, a7, a5
+; RV64IM-NEXT: xor a3, a4, a3
+; RV64IM-NEXT: xor a0, t0, a0
+; RV64IM-NEXT: ld a4, 384(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a1, a4
-; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a5, a3
-; RV64IM-NEXT: ld a4, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a4, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, a4
+; RV64IM-NEXT: ld a4, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, t3, a4
+; RV64IM-NEXT: ld a7, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, s7, a7
+; RV64IM-NEXT: ld t0, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, s0, t0
+; RV64IM-NEXT: xor t3, t4, s6
+; RV64IM-NEXT: xor t4, t6, s8
+; RV64IM-NEXT: xor a5, a5, t2
+; RV64IM-NEXT: xor a3, a3, a6
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: ld a4, 192(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a4, a7, a4
-; RV64IM-NEXT: ld a5, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, t2, a5
-; RV64IM-NEXT: xor a7, t0, t3
-; RV64IM-NEXT: xor a1, a1, a6
-; RV64IM-NEXT: xor a0, a0, a2
+; RV64IM-NEXT: ld a6, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, t0, a6
+; RV64IM-NEXT: xor a7, t3, ra
+; RV64IM-NEXT: xor t0, t4, s4
+; RV64IM-NEXT: xor a5, a5, s2
+; RV64IM-NEXT: xor a3, a3, t1
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a4, a3
-; RV64IM-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a5, a4
-; RV64IM-NEXT: xor a5, a7, s3
-; RV64IM-NEXT: xor a1, a1, t1
-; RV64IM-NEXT: xor a0, a0, a2
+; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: ld a4, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a6, a4
+; RV64IM-NEXT: ld a6, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a7, a6
+; RV64IM-NEXT: ld a7, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, t0, a7
+; RV64IM-NEXT: xor a5, a5, s1
+; RV64IM-NEXT: xor a3, a3, t5
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: ld a4, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a6, a4
+; RV64IM-NEXT: ld a6, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a7, a6
+; RV64IM-NEXT: xor a5, a5, s9
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a4, a3
-; RV64IM-NEXT: ld a4, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: ld a4, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a6, a4
+; RV64IM-NEXT: ld a6, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, a6
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a4, a5, a4
-; RV64IM-NEXT: xor a1, a1, t6
-; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a4, a3
-; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, a1, a2
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: addi sp, sp, 448
+; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a3, a1
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld ra, 520(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 512(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 504(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s2, 496(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s3, 488(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s4, 480(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s5, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s6, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s7, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s8, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s9, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s10, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 528
; RV64IM-NEXT: ret
;
; RV32IMZBS-LABEL: clmul_i64:
; RV32IMZBS: # %bb.0:
-; RV32IMZBS-NEXT: addi sp, sp, -272
-; RV32IMZBS-NEXT: sw ra, 268(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s0, 264(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s1, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s2, 256(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s3, 252(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s4, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s5, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s6, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s7, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s8, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s9, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s10, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s11, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli s6, a0, 8
-; RV32IMZBS-NEXT: srli t0, a0, 24
-; RV32IMZBS-NEXT: srli t1, a2, 8
-; RV32IMZBS-NEXT: srli t2, a2, 24
-; RV32IMZBS-NEXT: bseti s11, zero, 11
-; RV32IMZBS-NEXT: andi t3, a2, 2
-; RV32IMZBS-NEXT: andi t5, a2, 1
-; RV32IMZBS-NEXT: andi t6, a2, 4
-; RV32IMZBS-NEXT: andi s0, a2, 8
-; RV32IMZBS-NEXT: andi s1, a2, 16
-; RV32IMZBS-NEXT: andi s2, a2, 32
-; RV32IMZBS-NEXT: andi t4, a2, 128
-; RV32IMZBS-NEXT: andi s3, a2, 256
-; RV32IMZBS-NEXT: andi a4, a3, 2
-; RV32IMZBS-NEXT: andi a5, a3, 1
-; RV32IMZBS-NEXT: andi a6, a3, 4
-; RV32IMZBS-NEXT: andi a7, a3, 8
-; RV32IMZBS-NEXT: mul s4, a1, t3
-; RV32IMZBS-NEXT: mul s5, a1, t5
-; RV32IMZBS-NEXT: mul s7, a1, t6
-; RV32IMZBS-NEXT: mul s9, a1, s0
-; RV32IMZBS-NEXT: mul s10, a1, s1
-; RV32IMZBS-NEXT: xor s8, s5, s4
-; RV32IMZBS-NEXT: mul s5, a1, s2
-; RV32IMZBS-NEXT: xor s4, s7, s9
-; RV32IMZBS-NEXT: mul s7, a1, t4
-; RV32IMZBS-NEXT: xor s5, s10, s5
-; RV32IMZBS-NEXT: mul s9, a1, s3
-; RV32IMZBS-NEXT: xor s9, s7, s9
-; RV32IMZBS-NEXT: andi s7, a3, 16
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: xor a4, a5, a4
-; RV32IMZBS-NEXT: sw a4, 176(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s10, a3, 32
-; RV32IMZBS-NEXT: mul a5, a0, a6
-; RV32IMZBS-NEXT: mul a6, a0, a7
-; RV32IMZBS-NEXT: xor a4, a5, a6
-; RV32IMZBS-NEXT: andi a7, a3, 128
-; RV32IMZBS-NEXT: mul a6, a0, s7
-; RV32IMZBS-NEXT: mul s7, a0, s10
-; RV32IMZBS-NEXT: xor a5, a6, s7
-; RV32IMZBS-NEXT: sw a5, 168(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s7, a3, 256
-; RV32IMZBS-NEXT: mul a7, a0, a7
-; RV32IMZBS-NEXT: mul s7, a0, s7
-; RV32IMZBS-NEXT: xor a5, a7, s7
-; RV32IMZBS-NEXT: mul a7, a0, t3
-; RV32IMZBS-NEXT: mul t3, a0, t5
-; RV32IMZBS-NEXT: xor a7, t3, a7
-; RV32IMZBS-NEXT: andi t5, a2, 64
-; RV32IMZBS-NEXT: mul t3, a0, t6
-; RV32IMZBS-NEXT: mul t6, a0, s0
-; RV32IMZBS-NEXT: xor s0, t3, t6
-; RV32IMZBS-NEXT: andi t6, a2, 512
-; RV32IMZBS-NEXT: mul t3, a0, s1
-; RV32IMZBS-NEXT: mul s1, a0, s2
-; RV32IMZBS-NEXT: xor t3, t3, s1
-; RV32IMZBS-NEXT: lui s10, 16
-; RV32IMZBS-NEXT: addi a6, s10, -256
-; RV32IMZBS-NEXT: mul t4, a0, t4
-; RV32IMZBS-NEXT: mul s1, a0, s3
-; RV32IMZBS-NEXT: xor s1, t4, s1
-; RV32IMZBS-NEXT: mul t4, a1, t5
-; RV32IMZBS-NEXT: mv s3, a6
-; RV32IMZBS-NEXT: sw a6, 216(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s2, s6, a6
-; RV32IMZBS-NEXT: or a6, s2, t0
-; RV32IMZBS-NEXT: sw a6, 208(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a1, t6
-; RV32IMZBS-NEXT: and t1, t1, s3
-; RV32IMZBS-NEXT: or a6, t1, t2
-; RV32IMZBS-NEXT: sw a6, 204(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s11, 212(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s2, a2, s11
-; RV32IMZBS-NEXT: xor a6, s8, s4
-; RV32IMZBS-NEXT: sw a6, 200(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui ra, 1
-; RV32IMZBS-NEXT: and s6, a2, ra
-; RV32IMZBS-NEXT: xor a6, s5, t4
-; RV32IMZBS-NEXT: sw a6, 196(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t1, a1, s2
-; RV32IMZBS-NEXT: xor a6, s9, t0
-; RV32IMZBS-NEXT: sw a6, 192(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a1, s6
-; RV32IMZBS-NEXT: xor a6, t1, t0
-; RV32IMZBS-NEXT: sw a6, 188(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s9, 8
-; RV32IMZBS-NEXT: and s3, a2, s9
-; RV32IMZBS-NEXT: and s4, a2, s10
-; RV32IMZBS-NEXT: mul t0, a1, s3
-; RV32IMZBS-NEXT: mul t1, a1, s4
-; RV32IMZBS-NEXT: xor a6, t0, t1
-; RV32IMZBS-NEXT: sw a6, 184(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 256
-; RV32IMZBS-NEXT: lui t1, 512
-; RV32IMZBS-NEXT: and t2, a2, t0
-; RV32IMZBS-NEXT: lui s10, 256
-; RV32IMZBS-NEXT: and t4, a2, t1
-; RV32IMZBS-NEXT: mul t0, a1, t2
-; RV32IMZBS-NEXT: mul t1, a1, t4
-; RV32IMZBS-NEXT: xor a6, t0, t1
-; RV32IMZBS-NEXT: sw a6, 180(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 16384
-; RV32IMZBS-NEXT: lui t1, 32768
-; RV32IMZBS-NEXT: and t0, a2, t0
-; RV32IMZBS-NEXT: lui a6, 16384
-; RV32IMZBS-NEXT: and t1, a2, t1
-; RV32IMZBS-NEXT: lui s7, 32768
-; RV32IMZBS-NEXT: mul s5, a1, t0
-; RV32IMZBS-NEXT: mul s8, a1, t1
-; RV32IMZBS-NEXT: xor s5, s5, s8
-; RV32IMZBS-NEXT: sw s5, 172(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s5, 176(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, s5, a4
+; RV32IMZBS-NEXT: addi sp, sp, -400
+; RV32IMZBS-NEXT: sw ra, 396(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s0, 392(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s1, 388(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s2, 384(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s3, 380(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s4, 376(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s5, 372(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s6, 368(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s7, 364(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s8, 360(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s9, 356(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s10, 352(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s11, 348(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: mv t5, a3
+; RV32IMZBS-NEXT: mv s1, a2
+; RV32IMZBS-NEXT: mv t0, a1
+; RV32IMZBS-NEXT: mv s10, a0
+; RV32IMZBS-NEXT: srli a0, a0, 8
+; RV32IMZBS-NEXT: lui a1, 16
+; RV32IMZBS-NEXT: srli a3, s10, 24
+; RV32IMZBS-NEXT: srli a4, a2, 8
+; RV32IMZBS-NEXT: srli a6, a2, 24
+; RV32IMZBS-NEXT: slli s6, t0, 1
+; RV32IMZBS-NEXT: andi s8, a2, 2
+; RV32IMZBS-NEXT: slli s2, t0, 2
+; RV32IMZBS-NEXT: andi s4, a2, 4
+; RV32IMZBS-NEXT: slli s0, t0, 3
+; RV32IMZBS-NEXT: andi s3, a2, 8
+; RV32IMZBS-NEXT: slli t4, t0, 4
+; RV32IMZBS-NEXT: andi a2, a2, 16
+; RV32IMZBS-NEXT: slli t1, t0, 5
+; RV32IMZBS-NEXT: andi t6, s1, 32
+; RV32IMZBS-NEXT: slli a7, t0, 6
+; RV32IMZBS-NEXT: andi t3, s1, 64
+; RV32IMZBS-NEXT: slli s5, t0, 7
+; RV32IMZBS-NEXT: andi t2, s1, 128
+; RV32IMZBS-NEXT: slli s9, s10, 1
+; RV32IMZBS-NEXT: andi s7, t5, 2
+; RV32IMZBS-NEXT: slli s11, s10, 2
+; RV32IMZBS-NEXT: andi ra, t5, 4
+; RV32IMZBS-NEXT: slli a5, s10, 3
+; RV32IMZBS-NEXT: addi a1, a1, -256
+; RV32IMZBS-NEXT: sw a1, 344(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: or a0, a0, a3
+; RV32IMZBS-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a3, t5, 8
+; RV32IMZBS-NEXT: and a0, a4, a1
+; RV32IMZBS-NEXT: or a0, a0, a6
+; RV32IMZBS-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a0, s10, 4
+; RV32IMZBS-NEXT: seqz a6, s8
+; RV32IMZBS-NEXT: seqz a4, s7
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and s6, a6, s6
+; RV32IMZBS-NEXT: and s8, a4, s9
+; RV32IMZBS-NEXT: and a1, a6, s9
+; RV32IMZBS-NEXT: sw a1, 332(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, t5, 16
+; RV32IMZBS-NEXT: seqz a6, s4
+; RV32IMZBS-NEXT: seqz s4, ra
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: and a1, a6, s2
+; RV32IMZBS-NEXT: sw a1, 312(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, s4, s11
+; RV32IMZBS-NEXT: sw a1, 324(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a6, s11
+; RV32IMZBS-NEXT: sw a1, 328(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s10, 5
+; RV32IMZBS-NEXT: seqz s2, s3
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: addi s2, s2, -1
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a1, s2, s0
+; RV32IMZBS-NEXT: sw a1, 296(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 308(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, s2, a5
+; RV32IMZBS-NEXT: sw a1, 320(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a3, t5, 32
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and s2, a2, t4
+; RV32IMZBS-NEXT: and a4, a4, a0
+; RV32IMZBS-NEXT: sw a4, 288(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a0, s10, 6
+; RV32IMZBS-NEXT: seqz a2, t6
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a1, a2, t1
+; RV32IMZBS-NEXT: sw a1, 276(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a3, a6
+; RV32IMZBS-NEXT: sw a1, 280(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a2, a6
+; RV32IMZBS-NEXT: sw a1, 284(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a2, t5, 64
+; RV32IMZBS-NEXT: seqz a3, t3
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a1, a3, a7
+; RV32IMZBS-NEXT: sw a1, 292(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a0
+; RV32IMZBS-NEXT: sw a2, 304(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a3, a0
+; RV32IMZBS-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t5, 128
+; RV32IMZBS-NEXT: seqz a2, t2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and a1, a2, s5
+; RV32IMZBS-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a1, s10, 7
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a2, a1
+; RV32IMZBS-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s1, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t5, 256
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 8
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 228(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, s10, 8
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s1, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t5, 512
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 9
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 244(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, s10, 9
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s1, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t5, 1024
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 10
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 256(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, s10, 10
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a1, s1
+; RV32IMZBS-NEXT: bexti a0, a1, 11
+; RV32IMZBS-NEXT: addi a2, a0, -1
+; RV32IMZBS-NEXT: not a0, t5
+; RV32IMZBS-NEXT: bexti a3, a0, 11
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 11
+; RV32IMZBS-NEXT: and a4, a2, a4
; RV32IMZBS-NEXT: sw a4, 176(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a4, a3, 64
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: lw s5, 168(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, s5, a4
+; RV32IMZBS-NEXT: slli a4, s10, 11
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 188(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 196(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 12
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 12
+; RV32IMZBS-NEXT: and a4, a2, a4
; RV32IMZBS-NEXT: sw a4, 168(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a4, a3, 512
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: xor a4, a5, a4
-; RV32IMZBS-NEXT: sw a4, 164(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a3, s11
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: and a5, a3, ra
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: sw a4, 160(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a3, s9
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: lui a5, 16
-; RV32IMZBS-NEXT: and a5, a3, a5
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: sw a4, 156(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a3, s10
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: lui a5, 512
-; RV32IMZBS-NEXT: and a5, a3, a5
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: sw a4, 152(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a3, a6
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: and a5, a3, s7
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: sw a4, 144(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a4, a7, s0
-; RV32IMZBS-NEXT: sw a4, 148(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, t5
-; RV32IMZBS-NEXT: xor a4, t3, a4
-; RV32IMZBS-NEXT: sw a4, 140(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, t6
-; RV32IMZBS-NEXT: xor a4, s1, a4
-; RV32IMZBS-NEXT: sw a4, 136(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s2
-; RV32IMZBS-NEXT: mul a5, a0, s6
-; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: slli a4, s10, 12
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 172(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 180(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 13
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 13
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 184(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 13
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 192(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 200(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 14
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 14
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 204(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 14
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 208(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 216(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 15
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 212(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 15
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 220(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 224(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 16
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 16
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 104(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 16
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 116(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 124(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 17
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 17
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 96(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 17
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 100(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 108(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 18
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 18
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 112(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 18
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 120(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 128(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 19
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 19
+; RV32IMZBS-NEXT: and a4, a2, a4
; RV32IMZBS-NEXT: sw a4, 132(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s3
-; RV32IMZBS-NEXT: mul a5, a0, s4
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: sw a4, 128(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, t2
-; RV32IMZBS-NEXT: mul a5, a0, t4
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: sw a4, 124(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, t0
-; RV32IMZBS-NEXT: mul a5, a0, t1
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: sw a4, 120(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a4, a3, 1024
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 116(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t6, 2
-; RV32IMZBS-NEXT: lui s0, 4
-; RV32IMZBS-NEXT: lui t3, 32
-; RV32IMZBS-NEXT: lui t4, 64
-; RV32IMZBS-NEXT: lui t5, 128
-; RV32IMZBS-NEXT: lui t0, 1024
-; RV32IMZBS-NEXT: lui s1, 2048
-; RV32IMZBS-NEXT: lui t1, 4096
-; RV32IMZBS-NEXT: lui t2, 8192
-; RV32IMZBS-NEXT: lui a4, 65536
-; RV32IMZBS-NEXT: lui a5, 131072
-; RV32IMZBS-NEXT: lui a6, 262144
-; RV32IMZBS-NEXT: lui a7, 524288
-; RV32IMZBS-NEXT: and s8, a3, t6
+; RV32IMZBS-NEXT: slli a4, s10, 19
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 136(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 144(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 20
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 20
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 140(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 20
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 148(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 152(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 21
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 21
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 156(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 21
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 160(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 22
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 22
+; RV32IMZBS-NEXT: and s7, a2, a4
+; RV32IMZBS-NEXT: slli a4, s10, 22
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 40(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 44(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 23
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 23
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 23
+; RV32IMZBS-NEXT: and s5, a2, a4
+; RV32IMZBS-NEXT: slli s0, s10, 23
; RV32IMZBS-NEXT: and ra, a3, s0
-; RV32IMZBS-NEXT: and s3, a3, t3
-; RV32IMZBS-NEXT: and s4, a3, t4
-; RV32IMZBS-NEXT: and s10, a3, t5
-; RV32IMZBS-NEXT: and t0, a3, t0
-; RV32IMZBS-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 1024
-; RV32IMZBS-NEXT: and s1, a3, s1
-; RV32IMZBS-NEXT: sw s1, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s5, a3, t1
-; RV32IMZBS-NEXT: and s6, a3, t2
-; RV32IMZBS-NEXT: and s2, a3, a4
-; RV32IMZBS-NEXT: lui t4, 65536
-; RV32IMZBS-NEXT: and s7, a3, a5
-; RV32IMZBS-NEXT: lui t5, 131072
-; RV32IMZBS-NEXT: and s9, a3, a6
-; RV32IMZBS-NEXT: lui t6, 262144
-; RV32IMZBS-NEXT: and s11, a3, a7
-; RV32IMZBS-NEXT: lui s0, 524288
-; RV32IMZBS-NEXT: andi t3, a2, 1024
-; RV32IMZBS-NEXT: mul a3, a1, t3
-; RV32IMZBS-NEXT: sw a3, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, t3
-; RV32IMZBS-NEXT: sw a3, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 2
-; RV32IMZBS-NEXT: and a3, a2, a3
-; RV32IMZBS-NEXT: lui a4, 4
+; RV32IMZBS-NEXT: and a2, a2, s0
+; RV32IMZBS-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 25
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 25
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 25
; RV32IMZBS-NEXT: and a4, a2, a4
-; RV32IMZBS-NEXT: lui a5, 32
-; RV32IMZBS-NEXT: and a5, a2, a5
-; RV32IMZBS-NEXT: lui a6, 64
-; RV32IMZBS-NEXT: and t3, a2, a6
-; RV32IMZBS-NEXT: lui a6, 128
-; RV32IMZBS-NEXT: and a6, a2, a6
-; RV32IMZBS-NEXT: and a7, a2, t0
-; RV32IMZBS-NEXT: lui t0, 2048
-; RV32IMZBS-NEXT: and t0, a2, t0
-; RV32IMZBS-NEXT: and t1, a2, t1
-; RV32IMZBS-NEXT: and t2, a2, t2
-; RV32IMZBS-NEXT: and t4, a2, t4
-; RV32IMZBS-NEXT: and t5, a2, t5
-; RV32IMZBS-NEXT: and t6, a2, t6
-; RV32IMZBS-NEXT: and s0, a2, s0
-; RV32IMZBS-NEXT: mul s1, a1, a3
-; RV32IMZBS-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, a4
-; RV32IMZBS-NEXT: sw s1, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, a5
-; RV32IMZBS-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, t3
-; RV32IMZBS-NEXT: sw s1, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, a6
-; RV32IMZBS-NEXT: sw s1, 104(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, a7
-; RV32IMZBS-NEXT: sw s1, 16(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, t0
-; RV32IMZBS-NEXT: sw s1, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, t1
-; RV32IMZBS-NEXT: sw s1, 100(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, t2
-; RV32IMZBS-NEXT: sw s1, 112(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, t4
-; RV32IMZBS-NEXT: sw s1, 12(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, t5
-; RV32IMZBS-NEXT: sw s1, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a1, t6
-; RV32IMZBS-NEXT: sw s1, 92(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a1, s0
-; RV32IMZBS-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, s8
-; RV32IMZBS-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, ra
-; RV32IMZBS-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s8, a0, s3
-; RV32IMZBS-NEXT: mul a1, a0, s4
-; RV32IMZBS-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, s10
-; RV32IMZBS-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s3, a0, a1
-; RV32IMZBS-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s10, a0, a1
-; RV32IMZBS-NEXT: mul a1, a0, s5
-; RV32IMZBS-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, s6
-; RV32IMZBS-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s5, a0, s2
-; RV32IMZBS-NEXT: mul s7, a0, s7
-; RV32IMZBS-NEXT: mul a1, a0, s9
-; RV32IMZBS-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, s11
-; RV32IMZBS-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s11, a0, a3
-; RV32IMZBS-NEXT: mul s6, a0, a4
-; RV32IMZBS-NEXT: mul s1, a0, a5
-; RV32IMZBS-NEXT: mul s4, a0, t3
-; RV32IMZBS-NEXT: mul a1, a0, a6
-; RV32IMZBS-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t3, a0, a7
-; RV32IMZBS-NEXT: mul s2, a0, t0
-; RV32IMZBS-NEXT: mul a1, a0, t1
-; RV32IMZBS-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, t2
-; RV32IMZBS-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t4, a0, t4
-; RV32IMZBS-NEXT: mul t5, a0, t5
-; RV32IMZBS-NEXT: mul s9, a0, t6
-; RV32IMZBS-NEXT: mul a1, a0, s0
-; RV32IMZBS-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli t6, a0, 24
-; RV32IMZBS-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, a0, a1
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or t2, t6, a0
-; RV32IMZBS-NEXT: slli t6, a2, 24
-; RV32IMZBS-NEXT: and a2, a2, a1
-; RV32IMZBS-NEXT: slli a2, a2, 8
-; RV32IMZBS-NEXT: or t1, t6, a2
-; RV32IMZBS-NEXT: lw a0, 200(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 52(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a4, 48(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 25
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 56(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 26
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 26
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 26
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 52(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 26
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 64(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 27
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 27
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 27
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 72(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 27
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 76(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 84(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 28
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 28
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 28
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 92(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s10, 28
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 88(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 80(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 29
+; RV32IMZBS-NEXT: addi a3, a2, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 29
+; RV32IMZBS-NEXT: addi a4, a2, -1
+; RV32IMZBS-NEXT: slli a2, t0, 29
+; RV32IMZBS-NEXT: and t6, a3, a2
+; RV32IMZBS-NEXT: slli a7, s10, 29
+; RV32IMZBS-NEXT: and s4, a4, a7
+; RV32IMZBS-NEXT: and a2, a3, a7
+; RV32IMZBS-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a1, 30
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 30
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a7, t0, 30
+; RV32IMZBS-NEXT: and a6, a3, a7
+; RV32IMZBS-NEXT: slli a7, s10, 30
+; RV32IMZBS-NEXT: and t3, a4, a7
+; RV32IMZBS-NEXT: and a2, a3, a7
+; RV32IMZBS-NEXT: sw a2, 16(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: srli a7, s1, 31
+; RV32IMZBS-NEXT: seqz a7, a7
+; RV32IMZBS-NEXT: srli t2, t5, 31
+; RV32IMZBS-NEXT: seqz t2, t2
+; RV32IMZBS-NEXT: addi t4, a7, -1
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: slli a7, t0, 31
+; RV32IMZBS-NEXT: and a2, t4, a7
+; RV32IMZBS-NEXT: sw a2, 20(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s10, 31
+; RV32IMZBS-NEXT: and a2, t2, a5
+; RV32IMZBS-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, t4, a5
+; RV32IMZBS-NEXT: sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a5, t5, 1
+; RV32IMZBS-NEXT: andi t5, s1, 1
+; RV32IMZBS-NEXT: seqz t5, t5
+; RV32IMZBS-NEXT: bexti a1, a1, 24
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: bexti a0, a0, 24
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli t1, t0, 24
+; RV32IMZBS-NEXT: and t0, t5, t0
+; RV32IMZBS-NEXT: and a5, a5, s10
+; RV32IMZBS-NEXT: and t5, t5, s10
+; RV32IMZBS-NEXT: slli s9, s10, 24
+; RV32IMZBS-NEXT: lw a2, 344(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and s10, s10, a2
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli s10, s10, 8
+; RV32IMZBS-NEXT: and s11, a1, t1
+; RV32IMZBS-NEXT: and a0, a0, s9
+; RV32IMZBS-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a1, s9
+; RV32IMZBS-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or s9, s9, s10
+; RV32IMZBS-NEXT: slli s10, s1, 24
+; RV32IMZBS-NEXT: and s1, s1, a2
+; RV32IMZBS-NEXT: slli s1, s1, 8
+; RV32IMZBS-NEXT: or s3, s10, s1
+; RV32IMZBS-NEXT: xor s6, t0, s6
+; RV32IMZBS-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 296(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, a0, a1
+; RV32IMZBS-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, s2, a0
+; RV32IMZBS-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 228(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor s0, a0, s0
-; RV32IMZBS-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a5, 24(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a0, a5
-; RV32IMZBS-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 168(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, a0, s1
+; RV32IMZBS-NEXT: lw a0, 104(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, a0, a1
+; RV32IMZBS-NEXT: xor s5, s7, s5
+; RV32IMZBS-NEXT: xor t4, t6, a6
+; RV32IMZBS-NEXT: xor t6, a5, s8
+; RV32IMZBS-NEXT: lw a0, 324(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 308(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, a0, a1
+; RV32IMZBS-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 280(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a0, a1
+; RV32IMZBS-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a7, a0, a1
-; RV32IMZBS-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t0, a0, a1
-; RV32IMZBS-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 168(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a0, a4
-; RV32IMZBS-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a3, 116(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a0, a3
-; RV32IMZBS-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a0, a1
-; RV32IMZBS-NEXT: lw a0, 156(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, a0, s8
-; RV32IMZBS-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, a0, s3
-; RV32IMZBS-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, a0, s5
-; RV32IMZBS-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a0, a2
-; RV32IMZBS-NEXT: lw a0, 136(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw ra, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, ra
-; RV32IMZBS-NEXT: lw ra, 132(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, ra, s11
-; RV32IMZBS-NEXT: lw ra, 128(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, ra, s1
-; RV32IMZBS-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, ra, t3
-; RV32IMZBS-NEXT: lw ra, 120(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, ra, t4
-; RV32IMZBS-NEXT: lw ra, 208(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or t2, t2, ra
-; RV32IMZBS-NEXT: lw ra, 204(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or t1, t1, ra
-; RV32IMZBS-NEXT: xor t6, t6, s0
-; RV32IMZBS-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, s0
-; RV32IMZBS-NEXT: lw s0, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, s0
-; RV32IMZBS-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, s0
-; RV32IMZBS-NEXT: lw s0, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, s0
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: lw a4, 36(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a4
-; RV32IMZBS-NEXT: lw a4, 28(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, s8, a4
-; RV32IMZBS-NEXT: xor s0, s3, s10
-; RV32IMZBS-NEXT: xor s3, s5, s7
-; RV32IMZBS-NEXT: xor a0, a2, a0
-; RV32IMZBS-NEXT: xor a2, s11, s6
-; RV32IMZBS-NEXT: xor s1, s1, s4
-; RV32IMZBS-NEXT: xor t3, t3, s2
-; RV32IMZBS-NEXT: xor t4, t4, t5
-; RV32IMZBS-NEXT: xor a5, t6, a5
-; RV32IMZBS-NEXT: lw t5, 104(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, t5
-; RV32IMZBS-NEXT: lw t5, 100(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, t5
+; RV32IMZBS-NEXT: lw a0, 116(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a0, a1
+; RV32IMZBS-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, a0, ra
+; RV32IMZBS-NEXT: xor s4, s4, t3
+; RV32IMZBS-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, a0
+; RV32IMZBS-NEXT: lw a0, 328(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 320(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a0, a1
+; RV32IMZBS-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 284(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 264(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 236(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: lw a2, 196(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 180(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: lw a3, 124(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 108(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: lw a4, 44(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 36(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 28(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 16(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, s8
+; RV32IMZBS-NEXT: lw s8, 340(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: or s8, s9, s8
+; RV32IMZBS-NEXT: lw s9, 336(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: or s9, s3, s9
+; RV32IMZBS-NEXT: xor s3, s6, s10
+; RV32IMZBS-NEXT: lw s6, 292(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, t2, s6
+; RV32IMZBS-NEXT: lw t2, 244(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s0, t2
+; RV32IMZBS-NEXT: lw t2, 184(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, t2
+; RV32IMZBS-NEXT: lw t2, 112(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, t2
+; RV32IMZBS-NEXT: xor s5, s5, s11
+; RV32IMZBS-NEXT: lw t2, 20(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, t4, t2
+; RV32IMZBS-NEXT: xor t4, t6, s7
+; RV32IMZBS-NEXT: lw t6, 304(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t1, t6
+; RV32IMZBS-NEXT: lw t6, 248(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t6
+; RV32IMZBS-NEXT: lw t6, 192(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t6
+; RV32IMZBS-NEXT: lw t6, 120(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, a6, t6
+; RV32IMZBS-NEXT: lw a6, 8(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, ra, a6
+; RV32IMZBS-NEXT: lw a6, 24(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, s4, a6
+; RV32IMZBS-NEXT: xor t3, t5, t3
+; RV32IMZBS-NEXT: lw t5, 316(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, t5
+; RV32IMZBS-NEXT: lw t5, 260(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, t5
+; RV32IMZBS-NEXT: lw t5, 200(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, t5
+; RV32IMZBS-NEXT: lw t5, 128(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t5
+; RV32IMZBS-NEXT: lw t5, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t5
+; RV32IMZBS-NEXT: lw t5, 32(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, t5
+; RV32IMZBS-NEXT: sw a5, 340(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a5, s3, s6
+; RV32IMZBS-NEXT: lw t5, 256(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, s0, t5
+; RV32IMZBS-NEXT: lw s0, 204(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s1, s0
+; RV32IMZBS-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s2, s1
+; RV32IMZBS-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s5, s2
+; RV32IMZBS-NEXT: xor t1, t4, t1
+; RV32IMZBS-NEXT: lw t4, 268(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t4
+; RV32IMZBS-NEXT: lw t4, 208(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t4
+; RV32IMZBS-NEXT: lw t4, 136(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t6, t4
+; RV32IMZBS-NEXT: lw t6, 56(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, s7, t6
+; RV32IMZBS-NEXT: xor a0, t3, a0
+; RV32IMZBS-NEXT: lw t3, 272(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, t3
+; RV32IMZBS-NEXT: lw t3, 216(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, t3
+; RV32IMZBS-NEXT: lw t3, 144(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t3
+; RV32IMZBS-NEXT: lw t3, 60(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t3
+; RV32IMZBS-NEXT: xor a5, a5, t5
+; RV32IMZBS-NEXT: lw t3, 212(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, s0, t3
+; RV32IMZBS-NEXT: lw t5, 140(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, s1, t5
+; RV32IMZBS-NEXT: lw s0, 52(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s2, s0
+; RV32IMZBS-NEXT: xor a7, t1, a7
+; RV32IMZBS-NEXT: lw t1, 220(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t1
+; RV32IMZBS-NEXT: lw t1, 148(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t4, t1
+; RV32IMZBS-NEXT: lw t4, 64(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t6, t4
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a1
+; RV32IMZBS-NEXT: lw a1, 152(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, a3, a1
+; RV32IMZBS-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a1
+; RV32IMZBS-NEXT: lui a1, 61681
+; RV32IMZBS-NEXT: addi a1, a1, -241
+; RV32IMZBS-NEXT: srli a3, s8, 4
+; RV32IMZBS-NEXT: and s1, s8, a1
+; RV32IMZBS-NEXT: and a3, a3, a1
+; RV32IMZBS-NEXT: slli s1, s1, 4
+; RV32IMZBS-NEXT: or s1, a3, s1
+; RV32IMZBS-NEXT: srli a3, s9, 4
+; RV32IMZBS-NEXT: and s2, s9, a1
+; RV32IMZBS-NEXT: and a3, a3, a1
+; RV32IMZBS-NEXT: slli s2, s2, 4
+; RV32IMZBS-NEXT: or s2, a3, s2
+; RV32IMZBS-NEXT: xor t3, a5, t3
+; RV32IMZBS-NEXT: lui a3, 209715
+; RV32IMZBS-NEXT: lw a5, 156(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, a5
+; RV32IMZBS-NEXT: lui s3, 349525
+; RV32IMZBS-NEXT: addi a5, a3, 819
+; RV32IMZBS-NEXT: lw a3, 72(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s0, a3
+; RV32IMZBS-NEXT: addi a3, s3, 1365
+; RV32IMZBS-NEXT: xor a7, a7, t0
+; RV32IMZBS-NEXT: lw t0, 160(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t1, t0
+; RV32IMZBS-NEXT: lw t1, 76(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t4, t1
+; RV32IMZBS-NEXT: xor a0, a0, a2
+; RV32IMZBS-NEXT: lw a2, 164(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, t6, a2
+; RV32IMZBS-NEXT: lw t4, 84(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t4
+; RV32IMZBS-NEXT: srli t4, s1, 2
+; RV32IMZBS-NEXT: and s1, s1, a5
+; RV32IMZBS-NEXT: srli t6, s2, 2
+; RV32IMZBS-NEXT: and s2, s2, a5
+; RV32IMZBS-NEXT: xor t3, t3, t5
; RV32IMZBS-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, t5
-; RV32IMZBS-NEXT: xor a1, a3, a1
-; RV32IMZBS-NEXT: lw a3, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: lw a4, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, s0, a4
-; RV32IMZBS-NEXT: lw t5, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, s3, t5
+; RV32IMZBS-NEXT: xor t5, s0, t5
+; RV32IMZBS-NEXT: xor a7, a7, t0
+; RV32IMZBS-NEXT: lw t0, 88(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t1, t0
; RV32IMZBS-NEXT: xor a0, a0, a2
-; RV32IMZBS-NEXT: lw a2, 44(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, s1, a2
-; RV32IMZBS-NEXT: lw t6, 32(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, t6
-; RV32IMZBS-NEXT: xor t4, t4, s9
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: lw a6, 112(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 80(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a4, a2
+; RV32IMZBS-NEXT: and a4, t4, a5
+; RV32IMZBS-NEXT: slli s1, s1, 2
+; RV32IMZBS-NEXT: and t1, t6, a5
+; RV32IMZBS-NEXT: slli s2, s2, 2
+; RV32IMZBS-NEXT: xor t3, t3, t5
+; RV32IMZBS-NEXT: xor a7, a7, t0
+; RV32IMZBS-NEXT: xor a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or a4, a4, s1
+; RV32IMZBS-NEXT: or a0, t1, s2
+; RV32IMZBS-NEXT: xor a2, t3, t2
; RV32IMZBS-NEXT: xor a6, a7, a6
-; RV32IMZBS-NEXT: lw a7, 108(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, t0, a7
-; RV32IMZBS-NEXT: xor a3, a1, a3
-; RV32IMZBS-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a1
-; RV32IMZBS-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t5, a1
-; RV32IMZBS-NEXT: xor a2, a0, a2
-; RV32IMZBS-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, a0
-; RV32IMZBS-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, t4, a0
-; RV32IMZBS-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 61681
-; RV32IMZBS-NEXT: addi t5, a1, -241
-; RV32IMZBS-NEXT: srli t4, t2, 4
-; RV32IMZBS-NEXT: and t2, t2, t5
-; RV32IMZBS-NEXT: and t4, t4, t5
-; RV32IMZBS-NEXT: slli t2, t2, 4
-; RV32IMZBS-NEXT: or t2, t4, t2
-; RV32IMZBS-NEXT: srli t4, t1, 4
-; RV32IMZBS-NEXT: and t1, t1, t5
-; RV32IMZBS-NEXT: and t4, t4, t5
-; RV32IMZBS-NEXT: slli t1, t1, 4
-; RV32IMZBS-NEXT: or t1, t4, t1
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: xor a3, a3, a4
-; RV32IMZBS-NEXT: xor a0, a2, t3
-; RV32IMZBS-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a4, a5, a7
-; RV32IMZBS-NEXT: xor a3, a3, t0
-; RV32IMZBS-NEXT: xor a3, a3, a4
-; RV32IMZBS-NEXT: sw a3, 200(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a4, 209715
-; RV32IMZBS-NEXT: addi t4, a4, 819
-; RV32IMZBS-NEXT: srli a5, t2, 2
-; RV32IMZBS-NEXT: and a6, t2, t4
-; RV32IMZBS-NEXT: and a5, a5, t4
-; RV32IMZBS-NEXT: slli a6, a6, 2
-; RV32IMZBS-NEXT: or a7, a5, a6
-; RV32IMZBS-NEXT: srli a5, t1, 2
-; RV32IMZBS-NEXT: and a6, t1, t4
-; RV32IMZBS-NEXT: and a5, a5, t4
-; RV32IMZBS-NEXT: slli a6, a6, 2
-; RV32IMZBS-NEXT: or t0, a5, a6
-; RV32IMZBS-NEXT: lui t6, 349525
-; RV32IMZBS-NEXT: addi t6, t6, 1365
-; RV32IMZBS-NEXT: srli t1, a7, 1
-; RV32IMZBS-NEXT: and a7, a7, t6
-; RV32IMZBS-NEXT: and t1, t1, t6
-; RV32IMZBS-NEXT: slli a7, a7, 1
-; RV32IMZBS-NEXT: or a7, t1, a7
-; RV32IMZBS-NEXT: srli t1, t0, 1
-; RV32IMZBS-NEXT: and t0, t0, t6
-; RV32IMZBS-NEXT: and t1, t1, t6
-; RV32IMZBS-NEXT: slli t0, t0, 1
-; RV32IMZBS-NEXT: or t0, t1, t0
-; RV32IMZBS-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and s0, t0, a0
-; RV32IMZBS-NEXT: lui a0, 1
-; RV32IMZBS-NEXT: and s2, t0, a0
-; RV32IMZBS-NEXT: lui a0, 2
-; RV32IMZBS-NEXT: and s3, t0, a0
-; RV32IMZBS-NEXT: lui a0, 4
-; RV32IMZBS-NEXT: and s5, t0, a0
-; RV32IMZBS-NEXT: lui a0, 8
-; RV32IMZBS-NEXT: and s6, t0, a0
-; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: and s10, t0, a0
-; RV32IMZBS-NEXT: lui a0, 32
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 64
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 128
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 188(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 256
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 512
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 180(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 1024
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 176(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 2048
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 172(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 4096
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 168(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 8192
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 164(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 16384
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 160(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 32768
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 152(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 262144
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, t0, 2
-; RV32IMZBS-NEXT: andi a1, t0, 1
-; RV32IMZBS-NEXT: andi a2, t0, 4
-; RV32IMZBS-NEXT: andi a3, t0, 8
-; RV32IMZBS-NEXT: andi a4, t0, 16
-; RV32IMZBS-NEXT: andi a5, t0, 32
-; RV32IMZBS-NEXT: andi a6, t0, 64
-; RV32IMZBS-NEXT: andi t1, t0, 128
-; RV32IMZBS-NEXT: andi t2, t0, 256
-; RV32IMZBS-NEXT: andi t3, t0, 512
-; RV32IMZBS-NEXT: andi t0, t0, 1024
-; RV32IMZBS-NEXT: mul a0, a7, a0
-; RV32IMZBS-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul ra, a7, a1
-; RV32IMZBS-NEXT: mul s11, a7, a2
-; RV32IMZBS-NEXT: mul s8, a7, a3
-; RV32IMZBS-NEXT: mul s7, a7, a4
-; RV32IMZBS-NEXT: mul s4, a7, a5
-; RV32IMZBS-NEXT: mul a0, a7, a6
-; RV32IMZBS-NEXT: sw a0, 124(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a7, t1
-; RV32IMZBS-NEXT: sw a0, 212(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, a7, t2
-; RV32IMZBS-NEXT: mul t3, a7, t3
-; RV32IMZBS-NEXT: mul a0, a7, t0
-; RV32IMZBS-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a7, s0
-; RV32IMZBS-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a7, s2
-; RV32IMZBS-NEXT: sw a0, 136(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t1, a7, s3
-; RV32IMZBS-NEXT: mul a6, a7, s5
-; RV32IMZBS-NEXT: mul s2, a7, s6
-; RV32IMZBS-NEXT: mul s10, a7, s10
-; RV32IMZBS-NEXT: lw a0, 196(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a7, a0
-; RV32IMZBS-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a7, a0
-; RV32IMZBS-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a7, a0
-; RV32IMZBS-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a7, a0
-; RV32IMZBS-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a5, a7, a0
-; RV32IMZBS-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t2, a7, a0
-; RV32IMZBS-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s6, a7, a0
-; RV32IMZBS-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a7, a0
-; RV32IMZBS-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a7, a0
-; RV32IMZBS-NEXT: lw a4, 160(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a7, a4
-; RV32IMZBS-NEXT: lw t0, 156(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a7, t0
-; RV32IMZBS-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s0, a7, s0
-; RV32IMZBS-NEXT: lw s3, 148(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s3, a7, s3
-; RV32IMZBS-NEXT: lw s5, 144(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s5, a7, s5
-; RV32IMZBS-NEXT: lw s9, 140(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a7, a7, s9
-; RV32IMZBS-NEXT: lw s9, 120(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor ra, ra, s9
-; RV32IMZBS-NEXT: xor s8, s11, s8
-; RV32IMZBS-NEXT: xor s4, s7, s4
+; RV32IMZBS-NEXT: srli a7, a4, 1
+; RV32IMZBS-NEXT: and a4, a4, a3
+; RV32IMZBS-NEXT: srli t0, a0, 1
+; RV32IMZBS-NEXT: and a0, a0, a3
+; RV32IMZBS-NEXT: xor a2, a6, a2
+; RV32IMZBS-NEXT: sw a2, 332(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a7, a3
+; RV32IMZBS-NEXT: slli a4, a4, 1
+; RV32IMZBS-NEXT: and a7, t0, a3
+; RV32IMZBS-NEXT: slli a0, a0, 1
+; RV32IMZBS-NEXT: or t3, a2, a4
+; RV32IMZBS-NEXT: or t1, a7, a0
+; RV32IMZBS-NEXT: srli a0, a0, 31
+; RV32IMZBS-NEXT: slli a2, t3, 1
+; RV32IMZBS-NEXT: andi a4, t1, 2
+; RV32IMZBS-NEXT: slli a7, t3, 2
+; RV32IMZBS-NEXT: andi s6, t1, 4
+; RV32IMZBS-NEXT: slli t0, t3, 3
+; RV32IMZBS-NEXT: andi t5, t1, 8
+; RV32IMZBS-NEXT: slli t6, t3, 4
+; RV32IMZBS-NEXT: andi s0, t1, 16
+; RV32IMZBS-NEXT: slli s1, t3, 5
+; RV32IMZBS-NEXT: andi s2, t1, 32
+; RV32IMZBS-NEXT: slli t4, t3, 6
+; RV32IMZBS-NEXT: andi s4, t1, 64
+; RV32IMZBS-NEXT: slli t2, t3, 7
+; RV32IMZBS-NEXT: andi s3, t1, 128
+; RV32IMZBS-NEXT: slli s5, t3, 8
+; RV32IMZBS-NEXT: andi s8, t1, 256
+; RV32IMZBS-NEXT: slli a6, t3, 31
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and a0, a0, a6
+; RV32IMZBS-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a0, t3, 9
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a2, a4, a2
+; RV32IMZBS-NEXT: sw a2, 324(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a2, t1, 512
+; RV32IMZBS-NEXT: seqz a4, s6
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a4, a4, a7
+; RV32IMZBS-NEXT: sw a4, 320(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, t3, 10
+; RV32IMZBS-NEXT: seqz a7, t5
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: and a6, a7, t0
+; RV32IMZBS-NEXT: sw a6, 316(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a7, t1, 1024
+; RV32IMZBS-NEXT: seqz t5, s0
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and a6, t5, t6
+; RV32IMZBS-NEXT: sw a6, 312(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t5, t3, 11
+; RV32IMZBS-NEXT: seqz t6, s2
+; RV32IMZBS-NEXT: addi t6, t6, -1
+; RV32IMZBS-NEXT: and a6, t6, s1
+; RV32IMZBS-NEXT: sw a6, 304(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not s2, t1
+; RV32IMZBS-NEXT: seqz t6, s4
+; RV32IMZBS-NEXT: addi t6, t6, -1
+; RV32IMZBS-NEXT: and a6, t6, t4
+; RV32IMZBS-NEXT: sw a6, 308(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s10, t3, 12
+; RV32IMZBS-NEXT: seqz s1, s3
+; RV32IMZBS-NEXT: addi s1, s1, -1
+; RV32IMZBS-NEXT: and a6, s1, t2
+; RV32IMZBS-NEXT: sw a6, 296(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t3, 13
+; RV32IMZBS-NEXT: seqz s1, s8
+; RV32IMZBS-NEXT: addi s1, s1, -1
+; RV32IMZBS-NEXT: and s1, s1, s5
+; RV32IMZBS-NEXT: slli t0, t3, 14
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 292(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a0, t3, 15
+; RV32IMZBS-NEXT: seqz a2, a7
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 300(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, s2, 11
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and t5, a2, t5
+; RV32IMZBS-NEXT: bexti a2, s2, 12
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and s10, a2, s10
+; RV32IMZBS-NEXT: bexti a2, s2, 13
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and s8, a2, a6
+; RV32IMZBS-NEXT: bexti a2, s2, 14
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and t6, a2, t0
+; RV32IMZBS-NEXT: bexti a2, s2, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and s0, a2, a0
+; RV32IMZBS-NEXT: bexti a0, s2, 16
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a4, t3, 16
+; RV32IMZBS-NEXT: and a4, a0, a4
+; RV32IMZBS-NEXT: bexti a0, s2, 17
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a2, t3, 17
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: bexti a2, s2, 18
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a7, t3, 18
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: bexti a7, s2, 19
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: slli s11, t3, 19
+; RV32IMZBS-NEXT: and s11, a7, s11
+; RV32IMZBS-NEXT: bexti a7, s2, 20
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: slli ra, t3, 20
+; RV32IMZBS-NEXT: and ra, a7, ra
+; RV32IMZBS-NEXT: bexti a7, s2, 21
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: slli a6, t3, 21
+; RV32IMZBS-NEXT: and a7, a7, a6
+; RV32IMZBS-NEXT: bexti a6, s2, 22
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: slli t0, t3, 22
+; RV32IMZBS-NEXT: and a6, a6, t0
+; RV32IMZBS-NEXT: bexti t0, s2, 23
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: slli t4, t3, 23
+; RV32IMZBS-NEXT: and t0, t0, t4
+; RV32IMZBS-NEXT: bexti t4, s2, 24
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: slli s3, t3, 24
+; RV32IMZBS-NEXT: and t4, t4, s3
+; RV32IMZBS-NEXT: bexti s3, s2, 25
+; RV32IMZBS-NEXT: addi s3, s3, -1
+; RV32IMZBS-NEXT: slli t2, t3, 25
+; RV32IMZBS-NEXT: and t2, s3, t2
+; RV32IMZBS-NEXT: bexti s3, s2, 26
+; RV32IMZBS-NEXT: addi s3, s3, -1
+; RV32IMZBS-NEXT: slli s5, t3, 26
+; RV32IMZBS-NEXT: and s3, s3, s5
+; RV32IMZBS-NEXT: bexti s5, s2, 27
+; RV32IMZBS-NEXT: addi s5, s5, -1
+; RV32IMZBS-NEXT: slli s6, t3, 27
+; RV32IMZBS-NEXT: and s5, s5, s6
+; RV32IMZBS-NEXT: bexti s6, s2, 28
+; RV32IMZBS-NEXT: addi s6, s6, -1
+; RV32IMZBS-NEXT: slli s7, t3, 28
+; RV32IMZBS-NEXT: and s6, s6, s7
+; RV32IMZBS-NEXT: bexti s7, s2, 29
+; RV32IMZBS-NEXT: addi s7, s7, -1
+; RV32IMZBS-NEXT: slli s9, t3, 29
+; RV32IMZBS-NEXT: and s7, s7, s9
+; RV32IMZBS-NEXT: andi t1, t1, 1
+; RV32IMZBS-NEXT: seqz t1, t1
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: and t1, t1, t3
+; RV32IMZBS-NEXT: slli t3, t3, 30
+; RV32IMZBS-NEXT: bexti s2, s2, 30
+; RV32IMZBS-NEXT: addi s2, s2, -1
+; RV32IMZBS-NEXT: and t3, s2, t3
+; RV32IMZBS-NEXT: lw s2, 324(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t1, s2
+; RV32IMZBS-NEXT: lw s2, 320(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 316(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, s9
+; RV32IMZBS-NEXT: lw s9, 312(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s4, 304(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, s9, s4
+; RV32IMZBS-NEXT: lw s4, 296(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s4, s1
+; RV32IMZBS-NEXT: xor t5, t5, s10
+; RV32IMZBS-NEXT: xor a0, a4, a0
+; RV32IMZBS-NEXT: xor a4, a6, t0
+; RV32IMZBS-NEXT: xor a6, s7, t3
+; RV32IMZBS-NEXT: xor t0, t1, s2
+; RV32IMZBS-NEXT: lw t1, 308(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, s9, t1
+; RV32IMZBS-NEXT: lw t3, 292(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t3, s1, t3
-; RV32IMZBS-NEXT: xor a6, t1, a6
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: xor a1, ra, s8
-; RV32IMZBS-NEXT: lw a3, 124(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, s4, a3
-; RV32IMZBS-NEXT: lw t1, 116(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t3, t1
-; RV32IMZBS-NEXT: xor a6, a6, s2
-; RV32IMZBS-NEXT: xor a2, a2, a5
-; RV32IMZBS-NEXT: xor a0, a0, a4
-; RV32IMZBS-NEXT: xor a1, a1, a3
-; RV32IMZBS-NEXT: lw a3, 132(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, t1, a3
-; RV32IMZBS-NEXT: xor a4, a6, s10
+; RV32IMZBS-NEXT: xor t5, t5, s8
+; RV32IMZBS-NEXT: xor a0, a0, a2
+; RV32IMZBS-NEXT: xor a2, a4, t4
+; RV32IMZBS-NEXT: lw a4, 328(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a6, a4
+; RV32IMZBS-NEXT: xor a6, t0, t1
+; RV32IMZBS-NEXT: lw t0, 300(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t3, t0
+; RV32IMZBS-NEXT: xor t1, t5, t6
+; RV32IMZBS-NEXT: xor a0, a0, s11
; RV32IMZBS-NEXT: xor a2, a2, t2
-; RV32IMZBS-NEXT: xor a0, a0, t0
-; RV32IMZBS-NEXT: lw a5, 212(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a5
-; RV32IMZBS-NEXT: lw a5, 136(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a5
-; RV32IMZBS-NEXT: lw a5, 128(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: xor a2, a2, s6
-; RV32IMZBS-NEXT: xor a0, a0, s0
-; RV32IMZBS-NEXT: lw a5, 196(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: xor a0, a0, s3
-; RV32IMZBS-NEXT: xor a3, a1, a3
-; RV32IMZBS-NEXT: xor a3, a3, a4
-; RV32IMZBS-NEXT: xor a0, a0, s5
-; RV32IMZBS-NEXT: xor a2, a3, a2
+; RV32IMZBS-NEXT: xor a6, a6, t0
+; RV32IMZBS-NEXT: xor t0, t1, s0
+; RV32IMZBS-NEXT: xor a0, a0, ra
+; RV32IMZBS-NEXT: xor a2, a2, s3
+; RV32IMZBS-NEXT: xor a6, a6, t0
; RV32IMZBS-NEXT: xor a0, a0, a7
-; RV32IMZBS-NEXT: lw a4, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a3, a2, a4
-; RV32IMZBS-NEXT: xor a0, a2, a0
-; RV32IMZBS-NEXT: srli a2, a2, 8
-; RV32IMZBS-NEXT: and a2, a2, a4
-; RV32IMZBS-NEXT: slli a1, a1, 24
-; RV32IMZBS-NEXT: slli a3, a3, 8
-; RV32IMZBS-NEXT: or a1, a1, a3
-; RV32IMZBS-NEXT: srli a0, a0, 24
-; RV32IMZBS-NEXT: or a0, a2, a0
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: srli a1, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, t5
-; RV32IMZBS-NEXT: and a1, a1, t5
+; RV32IMZBS-NEXT: xor a2, a2, s5
+; RV32IMZBS-NEXT: xor a0, a6, a0
+; RV32IMZBS-NEXT: xor a2, a2, s6
+; RV32IMZBS-NEXT: xor a0, a0, a2
+; RV32IMZBS-NEXT: xor a0, a0, a4
+; RV32IMZBS-NEXT: srli a2, a0, 8
+; RV32IMZBS-NEXT: srli a4, a0, 24
+; RV32IMZBS-NEXT: lw a6, 344(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: or a2, a2, a4
+; RV32IMZBS-NEXT: and a4, a0, a6
+; RV32IMZBS-NEXT: slli a0, a0, 24
+; RV32IMZBS-NEXT: slli a4, a4, 8
+; RV32IMZBS-NEXT: or a0, a0, a4
+; RV32IMZBS-NEXT: or a0, a0, a2
+; RV32IMZBS-NEXT: srli a2, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: and a1, a2, a1
; RV32IMZBS-NEXT: slli a0, a0, 4
; RV32IMZBS-NEXT: or a0, a1, a0
; RV32IMZBS-NEXT: srli a1, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, t4
-; RV32IMZBS-NEXT: and a1, a1, t4
+; RV32IMZBS-NEXT: and a0, a0, a5
+; RV32IMZBS-NEXT: and a1, a1, a5
; RV32IMZBS-NEXT: slli a0, a0, 2
; RV32IMZBS-NEXT: or a0, a1, a0
; RV32IMZBS-NEXT: lui a1, 349525
; RV32IMZBS-NEXT: addi a1, a1, 1364
-; RV32IMZBS-NEXT: and a2, a0, t6
+; RV32IMZBS-NEXT: and a3, a0, a3
; RV32IMZBS-NEXT: srli a0, a0, 1
; RV32IMZBS-NEXT: and a0, a0, a1
-; RV32IMZBS-NEXT: slli a2, a2, 1
-; RV32IMZBS-NEXT: or a0, a0, a2
+; RV32IMZBS-NEXT: slli a3, a3, 1
+; RV32IMZBS-NEXT: or a0, a0, a3
; RV32IMZBS-NEXT: srli a0, a0, 1
-; RV32IMZBS-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a1, a0, a1
-; RV32IMZBS-NEXT: lw a0, 208(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 204(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 336(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a2, a0
-; RV32IMZBS-NEXT: lw ra, 268(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 264(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s2, 256(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s3, 252(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s4, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s5, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s8, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s9, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s10, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s11, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: addi sp, sp, 272
+; RV32IMZBS-NEXT: lw ra, 396(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 392(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 388(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 384(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s3, 380(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s4, 376(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s5, 372(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s6, 368(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 364(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 360(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 356(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s10, 352(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s11, 348(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: addi sp, sp, 400
; RV32IMZBS-NEXT: ret
;
; RV64IMZBS-LABEL: clmul_i64:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -544
-; RV64IMZBS-NEXT: sd ra, 536(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s0, 528(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 520(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s2, 512(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s3, 504(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s4, 496(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s5, 488(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s6, 480(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s7, 472(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s8, 464(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s9, 456(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s10, 448(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s11, 440(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi t1, a1, 2
-; RV64IMZBS-NEXT: andi t2, a1, 1
-; RV64IMZBS-NEXT: andi a5, a1, 4
-; RV64IMZBS-NEXT: andi a7, a1, 8
-; RV64IMZBS-NEXT: andi a4, a1, 16
-; RV64IMZBS-NEXT: andi a6, a1, 32
-; RV64IMZBS-NEXT: andi a2, a1, 128
-; RV64IMZBS-NEXT: andi a3, a1, 256
-; RV64IMZBS-NEXT: bseti t0, zero, 11
-; RV64IMZBS-NEXT: lui t3, 1
-; RV64IMZBS-NEXT: lui t4, 2
-; RV64IMZBS-NEXT: lui t5, 4
-; RV64IMZBS-NEXT: lui t6, 8
-; RV64IMZBS-NEXT: lui s0, 16
-; RV64IMZBS-NEXT: lui s1, 32
-; RV64IMZBS-NEXT: lui s2, 64
-; RV64IMZBS-NEXT: lui s3, 128
-; RV64IMZBS-NEXT: lui s4, 256
-; RV64IMZBS-NEXT: lui s5, 512
-; RV64IMZBS-NEXT: lui s6, 1024
-; RV64IMZBS-NEXT: lui s7, 2048
-; RV64IMZBS-NEXT: lui s8, 4096
-; RV64IMZBS-NEXT: lui s9, 8192
-; RV64IMZBS-NEXT: lui s10, 16384
-; RV64IMZBS-NEXT: lui s11, 32768
-; RV64IMZBS-NEXT: lui ra, 65536
-; RV64IMZBS-NEXT: mul t1, a0, t1
-; RV64IMZBS-NEXT: mul t2, a0, t2
-; RV64IMZBS-NEXT: xor t1, t2, t1
-; RV64IMZBS-NEXT: sd t1, 432(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui t1, 131072
-; RV64IMZBS-NEXT: mul a5, a0, a5
-; RV64IMZBS-NEXT: mul a7, a0, a7
-; RV64IMZBS-NEXT: xor a5, a5, a7
-; RV64IMZBS-NEXT: sd a5, 424(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 262144
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: mul a6, a0, a6
-; RV64IMZBS-NEXT: xor a4, a4, a6
-; RV64IMZBS-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 32
-; RV64IMZBS-NEXT: mul a2, a0, a2
-; RV64IMZBS-NEXT: mul a3, a0, a3
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: sd a2, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 33
-; RV64IMZBS-NEXT: and t2, a1, t0
-; RV64IMZBS-NEXT: and t3, a1, t3
-; RV64IMZBS-NEXT: and t4, a1, t4
-; RV64IMZBS-NEXT: and t5, a1, t5
-; RV64IMZBS-NEXT: and t6, a1, t6
-; RV64IMZBS-NEXT: and s0, a1, s0
-; RV64IMZBS-NEXT: sd s0, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s1, a1, s1
-; RV64IMZBS-NEXT: and s2, a1, s2
-; RV64IMZBS-NEXT: and s3, a1, s3
-; RV64IMZBS-NEXT: and s4, a1, s4
-; RV64IMZBS-NEXT: and s5, a1, s5
-; RV64IMZBS-NEXT: and a3, a1, s6
-; RV64IMZBS-NEXT: sd a3, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s7, a1, s7
-; RV64IMZBS-NEXT: and a3, a1, s8
-; RV64IMZBS-NEXT: sd a3, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s9, a1, s9
-; RV64IMZBS-NEXT: and a3, a1, s10
-; RV64IMZBS-NEXT: sd a3, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a3, a1, s11
-; RV64IMZBS-NEXT: sd a3, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and ra, a1, ra
-; RV64IMZBS-NEXT: and t1, a1, t1
-; RV64IMZBS-NEXT: and a5, a1, a5
-; RV64IMZBS-NEXT: sd a5, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a5, a1, a4
-; RV64IMZBS-NEXT: and t0, a1, a2
-; RV64IMZBS-NEXT: bseti a2, zero, 34
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 35
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 36
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 37
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 38
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 39
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 40
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 41
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 42
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 43
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 44
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 45
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 46
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 47
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 48
-; RV64IMZBS-NEXT: and a4, a1, a2
-; RV64IMZBS-NEXT: bseti a2, zero, 49
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 240(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 50
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 232(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 51
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 224(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 52
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 53
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 54
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 55
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 192(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 56
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 184(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 57
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 176(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 58
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 168(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 59
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 60
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s0, zero, 61
-; RV64IMZBS-NEXT: and s0, a1, s0
-; RV64IMZBS-NEXT: sd s0, 104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 62
-; RV64IMZBS-NEXT: and a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 72(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a2, a1, 64
-; RV64IMZBS-NEXT: andi a3, a1, 512
+; RV64IMZBS-NEXT: addi sp, sp, -464
+; RV64IMZBS-NEXT: sd ra, 456(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s0, 448(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 440(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 432(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s3, 424(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s4, 416(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s5, 408(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s6, 400(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s7, 392(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s8, 384(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s9, 376(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s10, 368(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s11, 360(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: mv a7, a0
+; RV64IMZBS-NEXT: slli s7, a0, 1
+; RV64IMZBS-NEXT: andi s11, a1, 2
+; RV64IMZBS-NEXT: slli s8, a0, 2
+; RV64IMZBS-NEXT: andi ra, a1, 4
+; RV64IMZBS-NEXT: slli s6, a0, 3
+; RV64IMZBS-NEXT: andi s10, a1, 8
+; RV64IMZBS-NEXT: slli s4, a0, 4
+; RV64IMZBS-NEXT: andi s9, a1, 16
+; RV64IMZBS-NEXT: slli t6, a0, 5
+; RV64IMZBS-NEXT: andi s5, a1, 32
+; RV64IMZBS-NEXT: slli t4, a0, 6
+; RV64IMZBS-NEXT: andi s2, a1, 64
+; RV64IMZBS-NEXT: slli t3, a0, 7
+; RV64IMZBS-NEXT: andi s0, a1, 128
+; RV64IMZBS-NEXT: slli a5, a0, 8
+; RV64IMZBS-NEXT: andi t1, a1, 256
+; RV64IMZBS-NEXT: slli a4, a0, 9
+; RV64IMZBS-NEXT: andi t0, a1, 512
+; RV64IMZBS-NEXT: slli a3, a0, 10
; RV64IMZBS-NEXT: andi a6, a1, 1024
-; RV64IMZBS-NEXT: srliw a7, a1, 31
+; RV64IMZBS-NEXT: slli a0, a0, 11
+; RV64IMZBS-NEXT: not a2, a1
+; RV64IMZBS-NEXT: slli t2, a7, 12
+; RV64IMZBS-NEXT: slli t5, a7, 13
+; RV64IMZBS-NEXT: slli s1, a7, 14
+; RV64IMZBS-NEXT: slli s3, a7, 15
+; RV64IMZBS-NEXT: seqz s11, s11
+; RV64IMZBS-NEXT: addi s11, s11, -1
+; RV64IMZBS-NEXT: and s7, s11, s7
+; RV64IMZBS-NEXT: sd s7, 352(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s7, a7, 16
+; RV64IMZBS-NEXT: seqz s11, ra
+; RV64IMZBS-NEXT: addi s11, s11, -1
+; RV64IMZBS-NEXT: and s8, s11, s8
+; RV64IMZBS-NEXT: sd s8, 344(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s8, a7, 17
+; RV64IMZBS-NEXT: seqz s10, s10
+; RV64IMZBS-NEXT: addi s10, s10, -1
+; RV64IMZBS-NEXT: and s6, s10, s6
+; RV64IMZBS-NEXT: sd s6, 336(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s6, a7, 18
+; RV64IMZBS-NEXT: seqz s9, s9
+; RV64IMZBS-NEXT: addi s9, s9, -1
+; RV64IMZBS-NEXT: and s4, s9, s4
+; RV64IMZBS-NEXT: sd s4, 328(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s4, a7, 19
+; RV64IMZBS-NEXT: seqz s5, s5
+; RV64IMZBS-NEXT: addi s5, s5, -1
+; RV64IMZBS-NEXT: and t6, s5, t6
+; RV64IMZBS-NEXT: sd t6, 312(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli t6, a7, 20
+; RV64IMZBS-NEXT: seqz s2, s2
+; RV64IMZBS-NEXT: addi s2, s2, -1
+; RV64IMZBS-NEXT: and t4, s2, t4
+; RV64IMZBS-NEXT: sd t4, 320(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli t4, a7, 21
+; RV64IMZBS-NEXT: seqz s0, s0
+; RV64IMZBS-NEXT: addi s0, s0, -1
+; RV64IMZBS-NEXT: and t3, s0, t3
+; RV64IMZBS-NEXT: sd t3, 304(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli t3, a7, 22
+; RV64IMZBS-NEXT: seqz t1, t1
+; RV64IMZBS-NEXT: addi t1, t1, -1
+; RV64IMZBS-NEXT: and a5, t1, a5
+; RV64IMZBS-NEXT: sd a5, 280(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a5, a7, 23
+; RV64IMZBS-NEXT: seqz t0, t0
+; RV64IMZBS-NEXT: addi t0, t0, -1
+; RV64IMZBS-NEXT: and a4, t0, a4
+; RV64IMZBS-NEXT: sd a4, 288(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a4, a7, 24
+; RV64IMZBS-NEXT: seqz a6, a6
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: and a3, a6, a3
+; RV64IMZBS-NEXT: sd a3, 296(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 11
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, a0
+; RV64IMZBS-NEXT: sd a0, 232(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 12
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, t2
+; RV64IMZBS-NEXT: sd a0, 224(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 13
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, t5
+; RV64IMZBS-NEXT: sd a0, 248(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 14
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, s1
+; RV64IMZBS-NEXT: sd a3, 264(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 15
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, s3
+; RV64IMZBS-NEXT: sd a0, 272(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 16
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, s7
+; RV64IMZBS-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 17
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, s8
+; RV64IMZBS-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 18
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, s6
+; RV64IMZBS-NEXT: sd a0, 208(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 19
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, s4
+; RV64IMZBS-NEXT: sd a0, 216(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 20
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, t6
+; RV64IMZBS-NEXT: sd a0, 240(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 21
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, t4
+; RV64IMZBS-NEXT: sd a0, 256(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 22
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a0, a3, t3
+; RV64IMZBS-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 23
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 136(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 24
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 152(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 25
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 25
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 26
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 26
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 168(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 27
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 27
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 184(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 28
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 28
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 176(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 29
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 29
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 88(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 30
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 30
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 72(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sraiw a3, a1, 31
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 31
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 80(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 32
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 32
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 96(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 33
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 33
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 112(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 34
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 34
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 128(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 35
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 35
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 120(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 36
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 36
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 104(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 37
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 37
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 38
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli s1, a7, 38
+; RV64IMZBS-NEXT: and s11, a3, s1
+; RV64IMZBS-NEXT: bexti a3, a2, 39
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 39
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 8(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 40
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 40
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 41
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 41
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 42
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 42
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 64(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 43
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 43
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 56(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 44
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 44
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 45
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 45
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a2, 46
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 46
+; RV64IMZBS-NEXT: and s4, a3, a4
+; RV64IMZBS-NEXT: bexti a3, a2, 47
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 47
+; RV64IMZBS-NEXT: and s1, a3, a4
+; RV64IMZBS-NEXT: bexti a3, a2, 48
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 48
+; RV64IMZBS-NEXT: and s3, a3, a4
+; RV64IMZBS-NEXT: bexti a3, a2, 49
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 49
+; RV64IMZBS-NEXT: and s6, a3, a4
+; RV64IMZBS-NEXT: bexti a3, a2, 50
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 50
+; RV64IMZBS-NEXT: and s8, a3, a4
+; RV64IMZBS-NEXT: bexti a3, a2, 51
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli s0, a7, 51
+; RV64IMZBS-NEXT: and s9, a3, s0
+; RV64IMZBS-NEXT: bexti a3, a2, 52
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 52
+; RV64IMZBS-NEXT: and s10, a3, a4
+; RV64IMZBS-NEXT: bexti a3, a2, 53
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 53
+; RV64IMZBS-NEXT: and s7, a3, a4
+; RV64IMZBS-NEXT: bexti a3, a2, 54
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 54
+; RV64IMZBS-NEXT: and s5, a3, a4
+; RV64IMZBS-NEXT: bexti a3, a2, 55
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 55
+; RV64IMZBS-NEXT: and s2, a3, a4
+; RV64IMZBS-NEXT: bexti a3, a2, 56
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a7, 56
+; RV64IMZBS-NEXT: and t3, a3, a4
+; RV64IMZBS-NEXT: bexti a4, a2, 57
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: slli a5, a7, 57
+; RV64IMZBS-NEXT: and t1, a4, a5
+; RV64IMZBS-NEXT: bexti a5, a2, 58
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: slli a6, a7, 58
+; RV64IMZBS-NEXT: and t2, a5, a6
+; RV64IMZBS-NEXT: bexti a5, a2, 59
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: slli a6, a7, 59
+; RV64IMZBS-NEXT: and t4, a5, a6
+; RV64IMZBS-NEXT: bexti a6, a2, 60
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: slli t0, a7, 60
+; RV64IMZBS-NEXT: and t5, a6, t0
+; RV64IMZBS-NEXT: bexti t0, a2, 61
+; RV64IMZBS-NEXT: addi t0, t0, -1
+; RV64IMZBS-NEXT: slli a0, a7, 61
+; RV64IMZBS-NEXT: and t0, t0, a0
+; RV64IMZBS-NEXT: bexti a0, a2, 62
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, a7, 62
+; RV64IMZBS-NEXT: and t6, a0, a2
+; RV64IMZBS-NEXT: andi a0, a1, 1
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and a0, a0, a7
+; RV64IMZBS-NEXT: slli a7, a7, 63
; RV64IMZBS-NEXT: srli a1, a1, 63
-; RV64IMZBS-NEXT: mul a2, a0, a2
-; RV64IMZBS-NEXT: sd a2, 48(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a2, a0, a3
-; RV64IMZBS-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a2, a0, a6
-; RV64IMZBS-NEXT: sd a2, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: slli a2, a7, 31
-; RV64IMZBS-NEXT: slli s8, a1, 63
-; RV64IMZBS-NEXT: mul a1, a0, t2
-; RV64IMZBS-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, t3
-; RV64IMZBS-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, t4
-; RV64IMZBS-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, t5
-; RV64IMZBS-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, t6
-; RV64IMZBS-NEXT: sd a1, 0(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s10, a0, a1
-; RV64IMZBS-NEXT: mul a1, a0, s1
-; RV64IMZBS-NEXT: sd a1, 64(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, s2
-; RV64IMZBS-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, s3
-; RV64IMZBS-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul s6, a0, s4
-; RV64IMZBS-NEXT: mul s5, a0, s5
-; RV64IMZBS-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, s7
-; RV64IMZBS-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, s9
-; RV64IMZBS-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s1, a0, a1
-; RV64IMZBS-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s0, a0, a1
-; RV64IMZBS-NEXT: mul s9, a0, ra
-; RV64IMZBS-NEXT: mul a1, a0, t1
-; RV64IMZBS-NEXT: sd a1, 56(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, a2
-; RV64IMZBS-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, a5
-; RV64IMZBS-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t4, a0, t0
+; RV64IMZBS-NEXT: seqz a1, a1
+; RV64IMZBS-NEXT: addi a1, a1, -1
+; RV64IMZBS-NEXT: and s0, a1, a7
; RV64IMZBS-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, a0, a1
-; RV64IMZBS-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s3, a0, a1
+; RV64IMZBS-NEXT: xor a6, a0, a1
+; RV64IMZBS-NEXT: ld a0, 344(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: xor a7, a0, a1
+; RV64IMZBS-NEXT: ld a0, 328(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t2, a0, a1
-; RV64IMZBS-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t6, a0, a1
-; RV64IMZBS-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s4, a0, a1
-; RV64IMZBS-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul ra, a0, a1
-; RV64IMZBS-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, a4
-; RV64IMZBS-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a7, a0, a1
-; RV64IMZBS-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, a0, a1
-; RV64IMZBS-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t1, a0, a1
-; RV64IMZBS-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t5, a0, a1
-; RV64IMZBS-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s2, a0, a1
-; RV64IMZBS-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s7, a0, a1
-; RV64IMZBS-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a1
-; RV64IMZBS-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a0, a1
-; RV64IMZBS-NEXT: ld a1, 72(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a6, a0, a1
-; RV64IMZBS-NEXT: mul a0, a0, s8
-; RV64IMZBS-NEXT: sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 432(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s8, a0, a1
-; RV64IMZBS-NEXT: ld a0, 416(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a2, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 280(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a1, a2
-; RV64IMZBS-NEXT: ld a2, 24(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 16(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, s11
-; RV64IMZBS-NEXT: ld s11, 0(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s10, s11, s10
-; RV64IMZBS-NEXT: xor s5, s6, s5
-; RV64IMZBS-NEXT: xor s0, s1, s0
-; RV64IMZBS-NEXT: xor t3, t4, t3
-; RV64IMZBS-NEXT: xor t0, t2, t0
-; RV64IMZBS-NEXT: xor a5, a7, a5
-; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: xor a0, s8, a0
-; RV64IMZBS-NEXT: ld a4, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, a1, a4
-; RV64IMZBS-NEXT: ld a4, 80(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: ld a4, 64(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, s10, a4
-; RV64IMZBS-NEXT: ld a7, 32(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, s5, a7
-; RV64IMZBS-NEXT: xor t2, s0, s9
-; RV64IMZBS-NEXT: xor t3, t3, s3
-; RV64IMZBS-NEXT: xor t0, t0, t6
-; RV64IMZBS-NEXT: xor a5, a5, t1
-; RV64IMZBS-NEXT: xor a3, a3, a6
-; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: ld a2, 128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a4, a2
-; RV64IMZBS-NEXT: ld a4, 88(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a7, a4
-; RV64IMZBS-NEXT: ld a6, 56(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, t2, a6
+; RV64IMZBS-NEXT: ld a2, 232(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a3, 224(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, a3
+; RV64IMZBS-NEXT: ld a3, 200(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 192(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a3, a4
+; RV64IMZBS-NEXT: ld a4, 144(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a5, 136(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, a5
+; RV64IMZBS-NEXT: ld a5, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, ra
+; RV64IMZBS-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s11, ra, s11
+; RV64IMZBS-NEXT: xor s1, s4, s1
+; RV64IMZBS-NEXT: xor t1, t3, t1
+; RV64IMZBS-NEXT: xor a6, a6, a7
+; RV64IMZBS-NEXT: ld a7, 320(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a0, a0, a7
+; RV64IMZBS-NEXT: ld a7, 288(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a1, a7
+; RV64IMZBS-NEXT: ld a7, 248(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, a7
+; RV64IMZBS-NEXT: ld a7, 208(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a3, a7
+; RV64IMZBS-NEXT: ld a7, 152(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, a7
+; RV64IMZBS-NEXT: ld a7, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, a7
; RV64IMZBS-NEXT: ld a7, 8(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, t3, a7
-; RV64IMZBS-NEXT: xor t0, t0, s4
-; RV64IMZBS-NEXT: xor a5, a5, t5
-; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: ld a2, 136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a4, a2
-; RV64IMZBS-NEXT: ld a4, 96(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a6, a4
-; RV64IMZBS-NEXT: ld a6, 328(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, s11, a7
+; RV64IMZBS-NEXT: xor t3, s1, s3
+; RV64IMZBS-NEXT: xor t1, t1, t2
+; RV64IMZBS-NEXT: xor a0, a6, a0
+; RV64IMZBS-NEXT: ld a6, 296(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a1, a6
+; RV64IMZBS-NEXT: ld a6, 264(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, a6
+; RV64IMZBS-NEXT: ld a6, 216(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a3, a6
+; RV64IMZBS-NEXT: ld a6, 160(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, a6
+; RV64IMZBS-NEXT: ld a6, 96(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, a6
+; RV64IMZBS-NEXT: ld a6, 32(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a6, a7, a6
-; RV64IMZBS-NEXT: xor a7, t0, ra
-; RV64IMZBS-NEXT: xor a5, a5, s2
+; RV64IMZBS-NEXT: xor a7, t3, s6
+; RV64IMZBS-NEXT: xor t1, t1, t4
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: ld a2, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a4, a2
-; RV64IMZBS-NEXT: ld a4, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a6, a4
-; RV64IMZBS-NEXT: ld a6, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a7, a6
-; RV64IMZBS-NEXT: xor a5, a5, s7
+; RV64IMZBS-NEXT: ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 168(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 112(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: xor a6, a7, s8
+; RV64IMZBS-NEXT: xor a7, t1, t5
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: ld a2, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a4, a2
-; RV64IMZBS-NEXT: ld a4, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a6, a4
-; RV64IMZBS-NEXT: ld a6, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, a6
+; RV64IMZBS-NEXT: ld a2, 184(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 128(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: xor a5, a6, s9
+; RV64IMZBS-NEXT: xor a6, a7, t0
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: ld a2, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a4, a2
-; RV64IMZBS-NEXT: ld a4, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a2, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 56(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: xor a4, a5, s10
+; RV64IMZBS-NEXT: xor a5, a6, t6
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: ld a2, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a4, a2
+; RV64IMZBS-NEXT: ld a2, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: xor a3, a4, s7
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: xor a2, a3, s5
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, a3, a1
+; RV64IMZBS-NEXT: xor a1, a2, s2
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld ra, 536(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 528(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 520(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s2, 512(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s3, 504(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s4, 496(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s5, 488(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s6, 480(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s7, 472(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s8, 464(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s9, 456(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s10, 448(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 440(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 544
+; RV64IMZBS-NEXT: xor a5, a5, s0
+; RV64IMZBS-NEXT: xor a0, a0, a5
+; RV64IMZBS-NEXT: ld ra, 456(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 448(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 440(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 432(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 424(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s4, 416(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s5, 408(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s6, 400(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s7, 392(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s8, 384(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s9, 376(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s10, 368(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s11, 360(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 464
; RV64IMZBS-NEXT: ret
%res = call i64 @llvm.clmul.i64(i64 %a, i64 %b)
ret i64 %res
@@ -5632,63 +6718,195 @@ define void @commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; RV64I-NEXT: sb a0, 0(a3)
; RV64I-NEXT: ret
;
-; CHECK-M-LABEL: commutative_clmul_i8:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: andi a4, a1, 2
-; CHECK-M-NEXT: andi a5, a1, 1
-; CHECK-M-NEXT: andi a6, a1, 4
-; CHECK-M-NEXT: andi a7, a1, 8
-; CHECK-M-NEXT: andi t0, a1, 16
-; CHECK-M-NEXT: andi t1, a1, 32
-; CHECK-M-NEXT: andi t2, a1, 64
-; CHECK-M-NEXT: andi a1, a1, -128
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: mul a6, a0, a6
-; CHECK-M-NEXT: mul a7, a0, a7
-; CHECK-M-NEXT: mul t0, a0, t0
-; CHECK-M-NEXT: mul t1, a0, t1
-; CHECK-M-NEXT: mul t2, a0, t2
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a4, a5, a4
-; CHECK-M-NEXT: xor a1, a6, a7
-; CHECK-M-NEXT: xor a5, t0, t1
-; CHECK-M-NEXT: xor a1, a4, a1
-; CHECK-M-NEXT: xor a4, a5, t2
-; CHECK-M-NEXT: xor a1, a1, a4
-; CHECK-M-NEXT: xor a0, a1, a0
-; CHECK-M-NEXT: sb a0, 0(a2)
-; CHECK-M-NEXT: sb a0, 0(a3)
-; CHECK-M-NEXT: ret
+; RV32IM-LABEL: commutative_clmul_i8:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: slli a4, a1, 30
+; RV32IM-NEXT: slli a5, a0, 1
+; RV32IM-NEXT: slli a6, a1, 29
+; RV32IM-NEXT: slli a7, a0, 2
+; RV32IM-NEXT: slli t0, a1, 28
+; RV32IM-NEXT: slli t1, a0, 3
+; RV32IM-NEXT: slli t2, a1, 27
+; RV32IM-NEXT: srli a4, a4, 31
+; RV32IM-NEXT: neg a4, a4
+; RV32IM-NEXT: and a4, a4, a5
+; RV32IM-NEXT: slli a5, a0, 4
+; RV32IM-NEXT: srli a6, a6, 31
+; RV32IM-NEXT: neg a6, a6
+; RV32IM-NEXT: and a6, a6, a7
+; RV32IM-NEXT: slli a7, a1, 26
+; RV32IM-NEXT: srli t0, t0, 31
+; RV32IM-NEXT: neg t0, t0
+; RV32IM-NEXT: and t0, t0, t1
+; RV32IM-NEXT: slli t1, a0, 5
+; RV32IM-NEXT: srli t2, t2, 31
+; RV32IM-NEXT: neg t2, t2
+; RV32IM-NEXT: and a5, t2, a5
+; RV32IM-NEXT: slli t2, a1, 25
+; RV32IM-NEXT: srli a7, a7, 31
+; RV32IM-NEXT: neg a7, a7
+; RV32IM-NEXT: and a7, a7, t1
+; RV32IM-NEXT: slli t1, a0, 6
+; RV32IM-NEXT: srli t2, t2, 31
+; RV32IM-NEXT: neg t2, t2
+; RV32IM-NEXT: and t1, t2, t1
+; RV32IM-NEXT: slli t2, a1, 31
+; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: srai t2, t2, 31
+; RV32IM-NEXT: and t2, t2, a0
+; RV32IM-NEXT: slli a0, a0, 7
+; RV32IM-NEXT: srli a1, a1, 31
+; RV32IM-NEXT: neg a1, a1
+; RV32IM-NEXT: and a0, a1, a0
+; RV32IM-NEXT: xor a1, t2, a4
+; RV32IM-NEXT: xor a4, a6, t0
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: xor a1, a1, a4
+; RV32IM-NEXT: xor a4, a5, t1
+; RV32IM-NEXT: xor a1, a1, a4
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sb a0, 0(a2)
+; RV32IM-NEXT: sb a0, 0(a3)
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: commutative_clmul_i8:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: slli a4, a1, 62
+; RV64IM-NEXT: slli a5, a0, 1
+; RV64IM-NEXT: slli a6, a1, 61
+; RV64IM-NEXT: slli a7, a0, 2
+; RV64IM-NEXT: slli t0, a1, 60
+; RV64IM-NEXT: slli t1, a0, 3
+; RV64IM-NEXT: slli t2, a1, 59
+; RV64IM-NEXT: srli a4, a4, 63
+; RV64IM-NEXT: neg a4, a4
+; RV64IM-NEXT: and a4, a4, a5
+; RV64IM-NEXT: slli a5, a0, 4
+; RV64IM-NEXT: srli a6, a6, 63
+; RV64IM-NEXT: neg a6, a6
+; RV64IM-NEXT: and a6, a6, a7
+; RV64IM-NEXT: slli a7, a1, 58
+; RV64IM-NEXT: srli t0, t0, 63
+; RV64IM-NEXT: neg t0, t0
+; RV64IM-NEXT: and t0, t0, t1
+; RV64IM-NEXT: slli t1, a0, 5
+; RV64IM-NEXT: srli t2, t2, 63
+; RV64IM-NEXT: neg t2, t2
+; RV64IM-NEXT: and a5, t2, a5
+; RV64IM-NEXT: slli t2, a1, 57
+; RV64IM-NEXT: srli a7, a7, 63
+; RV64IM-NEXT: neg a7, a7
+; RV64IM-NEXT: and a7, a7, t1
+; RV64IM-NEXT: slli t1, a0, 6
+; RV64IM-NEXT: srli t2, t2, 63
+; RV64IM-NEXT: neg t2, t2
+; RV64IM-NEXT: and t1, t2, t1
+; RV64IM-NEXT: slli t2, a1, 63
+; RV64IM-NEXT: slli a1, a1, 56
+; RV64IM-NEXT: srai t2, t2, 63
+; RV64IM-NEXT: and t2, t2, a0
+; RV64IM-NEXT: slli a0, a0, 7
+; RV64IM-NEXT: srli a1, a1, 63
+; RV64IM-NEXT: neg a1, a1
+; RV64IM-NEXT: and a0, a1, a0
+; RV64IM-NEXT: xor a1, t2, a4
+; RV64IM-NEXT: xor a4, a6, t0
+; RV64IM-NEXT: xor a5, a5, a7
+; RV64IM-NEXT: xor a1, a1, a4
+; RV64IM-NEXT: xor a4, a5, t1
+; RV64IM-NEXT: xor a1, a1, a4
+; RV64IM-NEXT: xor a0, a1, a0
+; RV64IM-NEXT: sb a0, 0(a2)
+; RV64IM-NEXT: sb a0, 0(a3)
+; RV64IM-NEXT: ret
+;
+; RV32IMZBS-LABEL: commutative_clmul_i8:
+; RV32IMZBS: # %bb.0:
+; RV32IMZBS-NEXT: bexti a4, a1, 1
+; RV32IMZBS-NEXT: slli a5, a0, 1
+; RV32IMZBS-NEXT: bexti a6, a1, 2
+; RV32IMZBS-NEXT: slli a7, a0, 2
+; RV32IMZBS-NEXT: bexti t0, a1, 3
+; RV32IMZBS-NEXT: slli t1, a0, 3
+; RV32IMZBS-NEXT: bexti t2, a1, 4
+; RV32IMZBS-NEXT: neg a4, a4
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: slli a5, a0, 4
+; RV32IMZBS-NEXT: neg a6, a6
+; RV32IMZBS-NEXT: and a6, a6, a7
+; RV32IMZBS-NEXT: bexti a7, a1, 5
+; RV32IMZBS-NEXT: neg t0, t0
+; RV32IMZBS-NEXT: and t0, t0, t1
+; RV32IMZBS-NEXT: slli t1, a0, 5
+; RV32IMZBS-NEXT: neg t2, t2
+; RV32IMZBS-NEXT: and a5, t2, a5
+; RV32IMZBS-NEXT: bexti t2, a1, 6
+; RV32IMZBS-NEXT: neg a7, a7
+; RV32IMZBS-NEXT: and a7, a7, t1
+; RV32IMZBS-NEXT: slli t1, a0, 6
+; RV32IMZBS-NEXT: neg t2, t2
+; RV32IMZBS-NEXT: and t1, t2, t1
+; RV32IMZBS-NEXT: slli t2, a1, 31
+; RV32IMZBS-NEXT: bexti a1, a1, 7
+; RV32IMZBS-NEXT: srai t2, t2, 31
+; RV32IMZBS-NEXT: and t2, t2, a0
+; RV32IMZBS-NEXT: slli a0, a0, 7
+; RV32IMZBS-NEXT: neg a1, a1
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: xor a1, t2, a4
+; RV32IMZBS-NEXT: xor a4, a6, t0
+; RV32IMZBS-NEXT: xor a5, a5, a7
+; RV32IMZBS-NEXT: xor a1, a1, a4
+; RV32IMZBS-NEXT: xor a4, a5, t1
+; RV32IMZBS-NEXT: xor a1, a1, a4
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: sb a0, 0(a2)
+; RV32IMZBS-NEXT: sb a0, 0(a3)
+; RV32IMZBS-NEXT: ret
;
-; CHECK-ZBS-LABEL: commutative_clmul_i8:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: andi a4, a1, 2
-; CHECK-ZBS-NEXT: andi a5, a1, 1
-; CHECK-ZBS-NEXT: andi a6, a1, 4
-; CHECK-ZBS-NEXT: andi a7, a1, 8
-; CHECK-ZBS-NEXT: andi t0, a1, 16
-; CHECK-ZBS-NEXT: andi t1, a1, 32
-; CHECK-ZBS-NEXT: andi t2, a1, 64
-; CHECK-ZBS-NEXT: andi a1, a1, -128
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: mul a6, a0, a6
-; CHECK-ZBS-NEXT: mul a7, a0, a7
-; CHECK-ZBS-NEXT: mul t0, a0, t0
-; CHECK-ZBS-NEXT: mul t1, a0, t1
-; CHECK-ZBS-NEXT: mul t2, a0, t2
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a4, a5, a4
-; CHECK-ZBS-NEXT: xor a1, a6, a7
-; CHECK-ZBS-NEXT: xor a5, t0, t1
-; CHECK-ZBS-NEXT: xor a1, a4, a1
-; CHECK-ZBS-NEXT: xor a4, a5, t2
-; CHECK-ZBS-NEXT: xor a1, a1, a4
-; CHECK-ZBS-NEXT: xor a0, a1, a0
-; CHECK-ZBS-NEXT: sb a0, 0(a2)
-; CHECK-ZBS-NEXT: sb a0, 0(a3)
-; CHECK-ZBS-NEXT: ret
+; RV64IMZBS-LABEL: commutative_clmul_i8:
+; RV64IMZBS: # %bb.0:
+; RV64IMZBS-NEXT: bexti a4, a1, 1
+; RV64IMZBS-NEXT: slli a5, a0, 1
+; RV64IMZBS-NEXT: bexti a6, a1, 2
+; RV64IMZBS-NEXT: slli a7, a0, 2
+; RV64IMZBS-NEXT: bexti t0, a1, 3
+; RV64IMZBS-NEXT: slli t1, a0, 3
+; RV64IMZBS-NEXT: bexti t2, a1, 4
+; RV64IMZBS-NEXT: neg a4, a4
+; RV64IMZBS-NEXT: and a4, a4, a5
+; RV64IMZBS-NEXT: slli a5, a0, 4
+; RV64IMZBS-NEXT: neg a6, a6
+; RV64IMZBS-NEXT: and a6, a6, a7
+; RV64IMZBS-NEXT: bexti a7, a1, 5
+; RV64IMZBS-NEXT: neg t0, t0
+; RV64IMZBS-NEXT: and t0, t0, t1
+; RV64IMZBS-NEXT: slli t1, a0, 5
+; RV64IMZBS-NEXT: neg t2, t2
+; RV64IMZBS-NEXT: and a5, t2, a5
+; RV64IMZBS-NEXT: bexti t2, a1, 6
+; RV64IMZBS-NEXT: neg a7, a7
+; RV64IMZBS-NEXT: and a7, a7, t1
+; RV64IMZBS-NEXT: slli t1, a0, 6
+; RV64IMZBS-NEXT: neg t2, t2
+; RV64IMZBS-NEXT: and t1, t2, t1
+; RV64IMZBS-NEXT: slli t2, a1, 63
+; RV64IMZBS-NEXT: bexti a1, a1, 7
+; RV64IMZBS-NEXT: srai t2, t2, 63
+; RV64IMZBS-NEXT: and t2, t2, a0
+; RV64IMZBS-NEXT: slli a0, a0, 7
+; RV64IMZBS-NEXT: neg a1, a1
+; RV64IMZBS-NEXT: and a0, a1, a0
+; RV64IMZBS-NEXT: xor a1, t2, a4
+; RV64IMZBS-NEXT: xor a4, a6, t0
+; RV64IMZBS-NEXT: xor a5, a5, a7
+; RV64IMZBS-NEXT: xor a1, a1, a4
+; RV64IMZBS-NEXT: xor a4, a5, t1
+; RV64IMZBS-NEXT: xor a1, a1, a4
+; RV64IMZBS-NEXT: xor a0, a1, a0
+; RV64IMZBS-NEXT: sb a0, 0(a2)
+; RV64IMZBS-NEXT: sb a0, 0(a3)
+; RV64IMZBS-NEXT: ret
%xy = call i8 @llvm.clmul.i8(i8 %x, i8 %y)
%yx = call i8 @llvm.clmul.i8(i8 %y, i8 %x)
store i8 %xy, ptr %p0
@@ -5828,29 +7046,51 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
; RV32IM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IM-NEXT: mv s0, a3
-; RV32IM-NEXT: andi a3, a1, 2
-; RV32IM-NEXT: andi a4, a1, 1
-; RV32IM-NEXT: andi a5, a1, 4
-; RV32IM-NEXT: andi a6, a1, 8
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: xor a3, a4, a3
-; RV32IM-NEXT: andi a4, a1, 16
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: mul a6, a0, a6
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: andi a6, a1, 32
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: mul a6, a0, a6
+; RV32IM-NEXT: slli a3, a1, 30
+; RV32IM-NEXT: slli a4, a0, 1
+; RV32IM-NEXT: slli a5, a1, 29
+; RV32IM-NEXT: slli a6, a0, 2
+; RV32IM-NEXT: slli a7, a1, 28
+; RV32IM-NEXT: slli t0, a0, 3
+; RV32IM-NEXT: slli t1, a1, 27
+; RV32IM-NEXT: srli a3, a3, 31
+; RV32IM-NEXT: neg a3, a3
+; RV32IM-NEXT: and a3, a3, a4
+; RV32IM-NEXT: slli a4, a0, 4
+; RV32IM-NEXT: srli a5, a5, 31
+; RV32IM-NEXT: neg a5, a5
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: slli a6, a1, 26
+; RV32IM-NEXT: srli a7, a7, 31
+; RV32IM-NEXT: neg a7, a7
+; RV32IM-NEXT: and a7, a7, t0
+; RV32IM-NEXT: slli t0, a0, 5
+; RV32IM-NEXT: srli t1, t1, 31
+; RV32IM-NEXT: neg t1, t1
+; RV32IM-NEXT: and a4, t1, a4
+; RV32IM-NEXT: slli t1, a1, 25
+; RV32IM-NEXT: srli a6, a6, 31
+; RV32IM-NEXT: neg a6, a6
+; RV32IM-NEXT: and a6, a6, t0
+; RV32IM-NEXT: slli t0, a0, 6
+; RV32IM-NEXT: srli t1, t1, 31
+; RV32IM-NEXT: neg t1, t1
+; RV32IM-NEXT: and t0, t1, t0
+; RV32IM-NEXT: slli t1, a1, 31
+; RV32IM-NEXT: srai t1, t1, 31
+; RV32IM-NEXT: and t1, t1, a0
+; RV32IM-NEXT: xor a3, t1, a3
+; RV32IM-NEXT: xor a5, a5, a7
; RV32IM-NEXT: xor a4, a4, a6
+; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: slli a0, a0, 7
+; RV32IM-NEXT: srli a1, a1, 31
+; RV32IM-NEXT: neg a1, a1
; RV32IM-NEXT: xor a3, a3, a5
-; RV32IM-NEXT: andi a5, a1, 64
-; RV32IM-NEXT: andi a1, a1, -128
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: xor a4, a4, a5
+; RV32IM-NEXT: xor a4, a4, t0
; RV32IM-NEXT: xor a3, a3, a4
-; RV32IM-NEXT: mul s1, a0, a1
-; RV32IM-NEXT: xor s1, a3, s1
+; RV32IM-NEXT: and a0, a1, a0
+; RV32IM-NEXT: xor s1, a3, a0
; RV32IM-NEXT: sb s1, 0(a2)
; RV32IM-NEXT: mv a0, s1
; RV32IM-NEXT: call use
@@ -5868,29 +7108,51 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
; RV64IM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64IM-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mv s0, a3
-; RV64IM-NEXT: andi a3, a1, 2
-; RV64IM-NEXT: andi a4, a1, 1
-; RV64IM-NEXT: andi a5, a1, 4
-; RV64IM-NEXT: andi a6, a1, 8
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: xor a3, a4, a3
-; RV64IM-NEXT: andi a4, a1, 16
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: mul a6, a0, a6
-; RV64IM-NEXT: xor a5, a5, a6
-; RV64IM-NEXT: andi a6, a1, 32
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: slli a3, a1, 62
+; RV64IM-NEXT: slli a4, a0, 1
+; RV64IM-NEXT: slli a5, a1, 61
+; RV64IM-NEXT: slli a6, a0, 2
+; RV64IM-NEXT: slli a7, a1, 60
+; RV64IM-NEXT: slli t0, a0, 3
+; RV64IM-NEXT: slli t1, a1, 59
+; RV64IM-NEXT: srli a3, a3, 63
+; RV64IM-NEXT: neg a3, a3
+; RV64IM-NEXT: and a3, a3, a4
+; RV64IM-NEXT: slli a4, a0, 4
+; RV64IM-NEXT: srli a5, a5, 63
+; RV64IM-NEXT: neg a5, a5
+; RV64IM-NEXT: and a5, a5, a6
+; RV64IM-NEXT: slli a6, a1, 58
+; RV64IM-NEXT: srli a7, a7, 63
+; RV64IM-NEXT: neg a7, a7
+; RV64IM-NEXT: and a7, a7, t0
+; RV64IM-NEXT: slli t0, a0, 5
+; RV64IM-NEXT: srli t1, t1, 63
+; RV64IM-NEXT: neg t1, t1
+; RV64IM-NEXT: and a4, t1, a4
+; RV64IM-NEXT: slli t1, a1, 57
+; RV64IM-NEXT: srli a6, a6, 63
+; RV64IM-NEXT: neg a6, a6
+; RV64IM-NEXT: and a6, a6, t0
+; RV64IM-NEXT: slli t0, a0, 6
+; RV64IM-NEXT: srli t1, t1, 63
+; RV64IM-NEXT: neg t1, t1
+; RV64IM-NEXT: and t0, t1, t0
+; RV64IM-NEXT: slli t1, a1, 63
+; RV64IM-NEXT: srai t1, t1, 63
+; RV64IM-NEXT: and t1, t1, a0
+; RV64IM-NEXT: xor a3, t1, a3
+; RV64IM-NEXT: xor a5, a5, a7
; RV64IM-NEXT: xor a4, a4, a6
+; RV64IM-NEXT: slli a1, a1, 56
+; RV64IM-NEXT: slli a0, a0, 7
+; RV64IM-NEXT: srli a1, a1, 63
+; RV64IM-NEXT: neg a1, a1
; RV64IM-NEXT: xor a3, a3, a5
-; RV64IM-NEXT: andi a5, a1, 64
-; RV64IM-NEXT: andi a1, a1, -128
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: xor a4, a4, a5
+; RV64IM-NEXT: xor a4, a4, t0
; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: mul s1, a0, a1
-; RV64IM-NEXT: xor s1, a3, s1
+; RV64IM-NEXT: and a0, a1, a0
+; RV64IM-NEXT: xor s1, a3, a0
; RV64IM-NEXT: sb s1, 0(a2)
; RV64IM-NEXT: mv a0, s1
; RV64IM-NEXT: call use
@@ -5908,29 +7170,44 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
; RV32IMZBS-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: mv s0, a3
-; RV32IMZBS-NEXT: andi a3, a1, 2
-; RV32IMZBS-NEXT: andi a4, a1, 1
-; RV32IMZBS-NEXT: andi a5, a1, 4
-; RV32IMZBS-NEXT: andi a6, a1, 8
-; RV32IMZBS-NEXT: mul a3, a0, a3
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: andi a4, a1, 16
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: mul a6, a0, a6
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: andi a6, a1, 32
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: mul a6, a0, a6
+; RV32IMZBS-NEXT: bexti a3, a1, 1
+; RV32IMZBS-NEXT: slli a4, a0, 1
+; RV32IMZBS-NEXT: bexti a5, a1, 2
+; RV32IMZBS-NEXT: slli a6, a0, 2
+; RV32IMZBS-NEXT: bexti a7, a1, 3
+; RV32IMZBS-NEXT: slli t0, a0, 3
+; RV32IMZBS-NEXT: bexti t1, a1, 4
+; RV32IMZBS-NEXT: neg a3, a3
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: slli a4, a0, 4
+; RV32IMZBS-NEXT: neg a5, a5
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: bexti a6, a1, 5
+; RV32IMZBS-NEXT: neg a7, a7
+; RV32IMZBS-NEXT: and a7, a7, t0
+; RV32IMZBS-NEXT: slli t0, a0, 5
+; RV32IMZBS-NEXT: neg t1, t1
+; RV32IMZBS-NEXT: and a4, t1, a4
+; RV32IMZBS-NEXT: bexti t1, a1, 6
+; RV32IMZBS-NEXT: neg a6, a6
+; RV32IMZBS-NEXT: and a6, a6, t0
+; RV32IMZBS-NEXT: slli t0, a0, 6
+; RV32IMZBS-NEXT: neg t1, t1
+; RV32IMZBS-NEXT: and t0, t1, t0
+; RV32IMZBS-NEXT: slli t1, a1, 31
+; RV32IMZBS-NEXT: srai t1, t1, 31
+; RV32IMZBS-NEXT: and t1, t1, a0
+; RV32IMZBS-NEXT: xor a3, t1, a3
+; RV32IMZBS-NEXT: xor a5, a5, a7
; RV32IMZBS-NEXT: xor a4, a4, a6
+; RV32IMZBS-NEXT: bexti a1, a1, 7
+; RV32IMZBS-NEXT: slli a0, a0, 7
+; RV32IMZBS-NEXT: neg a1, a1
; RV32IMZBS-NEXT: xor a3, a3, a5
-; RV32IMZBS-NEXT: andi a5, a1, 64
-; RV32IMZBS-NEXT: andi a1, a1, -128
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: xor a4, a4, t0
; RV32IMZBS-NEXT: xor a3, a3, a4
-; RV32IMZBS-NEXT: mul s1, a0, a1
-; RV32IMZBS-NEXT: xor s1, a3, s1
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: xor s1, a3, a0
; RV32IMZBS-NEXT: sb s1, 0(a2)
; RV32IMZBS-NEXT: mv a0, s1
; RV32IMZBS-NEXT: call use
@@ -5948,29 +7225,44 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
; RV64IMZBS-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64IMZBS-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64IMZBS-NEXT: mv s0, a3
-; RV64IMZBS-NEXT: andi a3, a1, 2
-; RV64IMZBS-NEXT: andi a4, a1, 1
-; RV64IMZBS-NEXT: andi a5, a1, 4
-; RV64IMZBS-NEXT: andi a6, a1, 8
-; RV64IMZBS-NEXT: mul a3, a0, a3
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: andi a4, a1, 16
-; RV64IMZBS-NEXT: mul a5, a0, a5
-; RV64IMZBS-NEXT: mul a6, a0, a6
-; RV64IMZBS-NEXT: xor a5, a5, a6
-; RV64IMZBS-NEXT: andi a6, a1, 32
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: mul a6, a0, a6
+; RV64IMZBS-NEXT: bexti a3, a1, 1
+; RV64IMZBS-NEXT: slli a4, a0, 1
+; RV64IMZBS-NEXT: bexti a5, a1, 2
+; RV64IMZBS-NEXT: slli a6, a0, 2
+; RV64IMZBS-NEXT: bexti a7, a1, 3
+; RV64IMZBS-NEXT: slli t0, a0, 3
+; RV64IMZBS-NEXT: bexti t1, a1, 4
+; RV64IMZBS-NEXT: neg a3, a3
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: slli a4, a0, 4
+; RV64IMZBS-NEXT: neg a5, a5
+; RV64IMZBS-NEXT: and a5, a5, a6
+; RV64IMZBS-NEXT: bexti a6, a1, 5
+; RV64IMZBS-NEXT: neg a7, a7
+; RV64IMZBS-NEXT: and a7, a7, t0
+; RV64IMZBS-NEXT: slli t0, a0, 5
+; RV64IMZBS-NEXT: neg t1, t1
+; RV64IMZBS-NEXT: and a4, t1, a4
+; RV64IMZBS-NEXT: bexti t1, a1, 6
+; RV64IMZBS-NEXT: neg a6, a6
+; RV64IMZBS-NEXT: and a6, a6, t0
+; RV64IMZBS-NEXT: slli t0, a0, 6
+; RV64IMZBS-NEXT: neg t1, t1
+; RV64IMZBS-NEXT: and t0, t1, t0
+; RV64IMZBS-NEXT: slli t1, a1, 63
+; RV64IMZBS-NEXT: srai t1, t1, 63
+; RV64IMZBS-NEXT: and t1, t1, a0
+; RV64IMZBS-NEXT: xor a3, t1, a3
+; RV64IMZBS-NEXT: xor a5, a5, a7
; RV64IMZBS-NEXT: xor a4, a4, a6
+; RV64IMZBS-NEXT: bexti a1, a1, 7
+; RV64IMZBS-NEXT: slli a0, a0, 7
+; RV64IMZBS-NEXT: neg a1, a1
; RV64IMZBS-NEXT: xor a3, a3, a5
-; RV64IMZBS-NEXT: andi a5, a1, 64
-; RV64IMZBS-NEXT: andi a1, a1, -128
-; RV64IMZBS-NEXT: mul a5, a0, a5
-; RV64IMZBS-NEXT: xor a4, a4, a5
+; RV64IMZBS-NEXT: xor a4, a4, t0
; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: mul s1, a0, a1
-; RV64IMZBS-NEXT: xor s1, a3, s1
+; RV64IMZBS-NEXT: and a0, a1, a0
+; RV64IMZBS-NEXT: xor s1, a3, a0
; RV64IMZBS-NEXT: sb s1, 0(a2)
; RV64IMZBS-NEXT: mv a0, s1
; RV64IMZBS-NEXT: call use
@@ -6091,63 +7383,195 @@ define void @neg_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; RV64I-NEXT: sb a0, 0(a3)
; RV64I-NEXT: ret
;
-; CHECK-M-LABEL: neg_commutative_clmul_i8:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: andi a4, a1, 2
-; CHECK-M-NEXT: andi a5, a1, 1
-; CHECK-M-NEXT: andi a6, a1, 4
-; CHECK-M-NEXT: andi a7, a1, 8
-; CHECK-M-NEXT: andi t0, a1, 16
-; CHECK-M-NEXT: andi t1, a1, 32
-; CHECK-M-NEXT: andi t2, a1, 64
-; CHECK-M-NEXT: andi a1, a1, -128
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: mul a6, a0, a6
-; CHECK-M-NEXT: mul a7, a0, a7
-; CHECK-M-NEXT: mul t0, a0, t0
-; CHECK-M-NEXT: mul t1, a0, t1
-; CHECK-M-NEXT: mul t2, a0, t2
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a4, a5, a4
-; CHECK-M-NEXT: xor a1, a6, a7
-; CHECK-M-NEXT: xor a5, t0, t1
-; CHECK-M-NEXT: xor a1, a4, a1
-; CHECK-M-NEXT: xor a4, a5, t2
-; CHECK-M-NEXT: xor a1, a1, a4
-; CHECK-M-NEXT: xor a0, a1, a0
-; CHECK-M-NEXT: sb a0, 0(a2)
-; CHECK-M-NEXT: sb a0, 0(a3)
-; CHECK-M-NEXT: ret
+; RV32IM-LABEL: neg_commutative_clmul_i8:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: slli a4, a1, 30
+; RV32IM-NEXT: slli a5, a0, 1
+; RV32IM-NEXT: slli a6, a1, 29
+; RV32IM-NEXT: slli a7, a0, 2
+; RV32IM-NEXT: slli t0, a1, 28
+; RV32IM-NEXT: slli t1, a0, 3
+; RV32IM-NEXT: slli t2, a1, 27
+; RV32IM-NEXT: srli a4, a4, 31
+; RV32IM-NEXT: neg a4, a4
+; RV32IM-NEXT: and a4, a4, a5
+; RV32IM-NEXT: slli a5, a0, 4
+; RV32IM-NEXT: srli a6, a6, 31
+; RV32IM-NEXT: neg a6, a6
+; RV32IM-NEXT: and a6, a6, a7
+; RV32IM-NEXT: slli a7, a1, 26
+; RV32IM-NEXT: srli t0, t0, 31
+; RV32IM-NEXT: neg t0, t0
+; RV32IM-NEXT: and t0, t0, t1
+; RV32IM-NEXT: slli t1, a0, 5
+; RV32IM-NEXT: srli t2, t2, 31
+; RV32IM-NEXT: neg t2, t2
+; RV32IM-NEXT: and a5, t2, a5
+; RV32IM-NEXT: slli t2, a1, 25
+; RV32IM-NEXT: srli a7, a7, 31
+; RV32IM-NEXT: neg a7, a7
+; RV32IM-NEXT: and a7, a7, t1
+; RV32IM-NEXT: slli t1, a0, 6
+; RV32IM-NEXT: srli t2, t2, 31
+; RV32IM-NEXT: neg t2, t2
+; RV32IM-NEXT: and t1, t2, t1
+; RV32IM-NEXT: slli t2, a1, 31
+; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: srai t2, t2, 31
+; RV32IM-NEXT: and t2, t2, a0
+; RV32IM-NEXT: slli a0, a0, 7
+; RV32IM-NEXT: srli a1, a1, 31
+; RV32IM-NEXT: neg a1, a1
+; RV32IM-NEXT: and a0, a1, a0
+; RV32IM-NEXT: xor a1, t2, a4
+; RV32IM-NEXT: xor a4, a6, t0
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: xor a1, a1, a4
+; RV32IM-NEXT: xor a4, a5, t1
+; RV32IM-NEXT: xor a1, a1, a4
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sb a0, 0(a2)
+; RV32IM-NEXT: sb a0, 0(a3)
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: neg_commutative_clmul_i8:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: slli a4, a1, 62
+; RV64IM-NEXT: slli a5, a0, 1
+; RV64IM-NEXT: slli a6, a1, 61
+; RV64IM-NEXT: slli a7, a0, 2
+; RV64IM-NEXT: slli t0, a1, 60
+; RV64IM-NEXT: slli t1, a0, 3
+; RV64IM-NEXT: slli t2, a1, 59
+; RV64IM-NEXT: srli a4, a4, 63
+; RV64IM-NEXT: neg a4, a4
+; RV64IM-NEXT: and a4, a4, a5
+; RV64IM-NEXT: slli a5, a0, 4
+; RV64IM-NEXT: srli a6, a6, 63
+; RV64IM-NEXT: neg a6, a6
+; RV64IM-NEXT: and a6, a6, a7
+; RV64IM-NEXT: slli a7, a1, 58
+; RV64IM-NEXT: srli t0, t0, 63
+; RV64IM-NEXT: neg t0, t0
+; RV64IM-NEXT: and t0, t0, t1
+; RV64IM-NEXT: slli t1, a0, 5
+; RV64IM-NEXT: srli t2, t2, 63
+; RV64IM-NEXT: neg t2, t2
+; RV64IM-NEXT: and a5, t2, a5
+; RV64IM-NEXT: slli t2, a1, 57
+; RV64IM-NEXT: srli a7, a7, 63
+; RV64IM-NEXT: neg a7, a7
+; RV64IM-NEXT: and a7, a7, t1
+; RV64IM-NEXT: slli t1, a0, 6
+; RV64IM-NEXT: srli t2, t2, 63
+; RV64IM-NEXT: neg t2, t2
+; RV64IM-NEXT: and t1, t2, t1
+; RV64IM-NEXT: slli t2, a1, 63
+; RV64IM-NEXT: slli a1, a1, 56
+; RV64IM-NEXT: srai t2, t2, 63
+; RV64IM-NEXT: and t2, t2, a0
+; RV64IM-NEXT: slli a0, a0, 7
+; RV64IM-NEXT: srli a1, a1, 63
+; RV64IM-NEXT: neg a1, a1
+; RV64IM-NEXT: and a0, a1, a0
+; RV64IM-NEXT: xor a1, t2, a4
+; RV64IM-NEXT: xor a4, a6, t0
+; RV64IM-NEXT: xor a5, a5, a7
+; RV64IM-NEXT: xor a1, a1, a4
+; RV64IM-NEXT: xor a4, a5, t1
+; RV64IM-NEXT: xor a1, a1, a4
+; RV64IM-NEXT: xor a0, a1, a0
+; RV64IM-NEXT: sb a0, 0(a2)
+; RV64IM-NEXT: sb a0, 0(a3)
+; RV64IM-NEXT: ret
+;
+; RV32IMZBS-LABEL: neg_commutative_clmul_i8:
+; RV32IMZBS: # %bb.0:
+; RV32IMZBS-NEXT: bexti a4, a1, 1
+; RV32IMZBS-NEXT: slli a5, a0, 1
+; RV32IMZBS-NEXT: bexti a6, a1, 2
+; RV32IMZBS-NEXT: slli a7, a0, 2
+; RV32IMZBS-NEXT: bexti t0, a1, 3
+; RV32IMZBS-NEXT: slli t1, a0, 3
+; RV32IMZBS-NEXT: bexti t2, a1, 4
+; RV32IMZBS-NEXT: neg a4, a4
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: slli a5, a0, 4
+; RV32IMZBS-NEXT: neg a6, a6
+; RV32IMZBS-NEXT: and a6, a6, a7
+; RV32IMZBS-NEXT: bexti a7, a1, 5
+; RV32IMZBS-NEXT: neg t0, t0
+; RV32IMZBS-NEXT: and t0, t0, t1
+; RV32IMZBS-NEXT: slli t1, a0, 5
+; RV32IMZBS-NEXT: neg t2, t2
+; RV32IMZBS-NEXT: and a5, t2, a5
+; RV32IMZBS-NEXT: bexti t2, a1, 6
+; RV32IMZBS-NEXT: neg a7, a7
+; RV32IMZBS-NEXT: and a7, a7, t1
+; RV32IMZBS-NEXT: slli t1, a0, 6
+; RV32IMZBS-NEXT: neg t2, t2
+; RV32IMZBS-NEXT: and t1, t2, t1
+; RV32IMZBS-NEXT: slli t2, a1, 31
+; RV32IMZBS-NEXT: bexti a1, a1, 7
+; RV32IMZBS-NEXT: srai t2, t2, 31
+; RV32IMZBS-NEXT: and t2, t2, a0
+; RV32IMZBS-NEXT: slli a0, a0, 7
+; RV32IMZBS-NEXT: neg a1, a1
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: xor a1, t2, a4
+; RV32IMZBS-NEXT: xor a4, a6, t0
+; RV32IMZBS-NEXT: xor a5, a5, a7
+; RV32IMZBS-NEXT: xor a1, a1, a4
+; RV32IMZBS-NEXT: xor a4, a5, t1
+; RV32IMZBS-NEXT: xor a1, a1, a4
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: sb a0, 0(a2)
+; RV32IMZBS-NEXT: sb a0, 0(a3)
+; RV32IMZBS-NEXT: ret
;
-; CHECK-ZBS-LABEL: neg_commutative_clmul_i8:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: andi a4, a1, 2
-; CHECK-ZBS-NEXT: andi a5, a1, 1
-; CHECK-ZBS-NEXT: andi a6, a1, 4
-; CHECK-ZBS-NEXT: andi a7, a1, 8
-; CHECK-ZBS-NEXT: andi t0, a1, 16
-; CHECK-ZBS-NEXT: andi t1, a1, 32
-; CHECK-ZBS-NEXT: andi t2, a1, 64
-; CHECK-ZBS-NEXT: andi a1, a1, -128
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: mul a6, a0, a6
-; CHECK-ZBS-NEXT: mul a7, a0, a7
-; CHECK-ZBS-NEXT: mul t0, a0, t0
-; CHECK-ZBS-NEXT: mul t1, a0, t1
-; CHECK-ZBS-NEXT: mul t2, a0, t2
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a4, a5, a4
-; CHECK-ZBS-NEXT: xor a1, a6, a7
-; CHECK-ZBS-NEXT: xor a5, t0, t1
-; CHECK-ZBS-NEXT: xor a1, a4, a1
-; CHECK-ZBS-NEXT: xor a4, a5, t2
-; CHECK-ZBS-NEXT: xor a1, a1, a4
-; CHECK-ZBS-NEXT: xor a0, a1, a0
-; CHECK-ZBS-NEXT: sb a0, 0(a2)
-; CHECK-ZBS-NEXT: sb a0, 0(a3)
-; CHECK-ZBS-NEXT: ret
+; RV64IMZBS-LABEL: neg_commutative_clmul_i8:
+; RV64IMZBS: # %bb.0:
+; RV64IMZBS-NEXT: bexti a4, a1, 1
+; RV64IMZBS-NEXT: slli a5, a0, 1
+; RV64IMZBS-NEXT: bexti a6, a1, 2
+; RV64IMZBS-NEXT: slli a7, a0, 2
+; RV64IMZBS-NEXT: bexti t0, a1, 3
+; RV64IMZBS-NEXT: slli t1, a0, 3
+; RV64IMZBS-NEXT: bexti t2, a1, 4
+; RV64IMZBS-NEXT: neg a4, a4
+; RV64IMZBS-NEXT: and a4, a4, a5
+; RV64IMZBS-NEXT: slli a5, a0, 4
+; RV64IMZBS-NEXT: neg a6, a6
+; RV64IMZBS-NEXT: and a6, a6, a7
+; RV64IMZBS-NEXT: bexti a7, a1, 5
+; RV64IMZBS-NEXT: neg t0, t0
+; RV64IMZBS-NEXT: and t0, t0, t1
+; RV64IMZBS-NEXT: slli t1, a0, 5
+; RV64IMZBS-NEXT: neg t2, t2
+; RV64IMZBS-NEXT: and a5, t2, a5
+; RV64IMZBS-NEXT: bexti t2, a1, 6
+; RV64IMZBS-NEXT: neg a7, a7
+; RV64IMZBS-NEXT: and a7, a7, t1
+; RV64IMZBS-NEXT: slli t1, a0, 6
+; RV64IMZBS-NEXT: neg t2, t2
+; RV64IMZBS-NEXT: and t1, t2, t1
+; RV64IMZBS-NEXT: slli t2, a1, 63
+; RV64IMZBS-NEXT: bexti a1, a1, 7
+; RV64IMZBS-NEXT: srai t2, t2, 63
+; RV64IMZBS-NEXT: and t2, t2, a0
+; RV64IMZBS-NEXT: slli a0, a0, 7
+; RV64IMZBS-NEXT: neg a1, a1
+; RV64IMZBS-NEXT: and a0, a1, a0
+; RV64IMZBS-NEXT: xor a1, t2, a4
+; RV64IMZBS-NEXT: xor a4, a6, t0
+; RV64IMZBS-NEXT: xor a5, a5, a7
+; RV64IMZBS-NEXT: xor a1, a1, a4
+; RV64IMZBS-NEXT: xor a4, a5, t1
+; RV64IMZBS-NEXT: xor a1, a1, a4
+; RV64IMZBS-NEXT: xor a0, a1, a0
+; RV64IMZBS-NEXT: sb a0, 0(a2)
+; RV64IMZBS-NEXT: sb a0, 0(a3)
+; RV64IMZBS-NEXT: ret
%xy = call i8 @llvm.clmul.i8(i8 %x, i8 %y)
store i8 %xy, ptr %p0
store i8 %xy, ptr %p1
@@ -9608,2713 +11032,2695 @@ define void @commutative_clmul_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %p
;
; RV32IM-LABEL: commutative_clmul_v2i64:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -784
-; RV32IM-NEXT: sw ra, 780(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s0, 776(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s1, 772(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s2, 768(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s3, 764(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s4, 760(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s5, 756(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s6, 752(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s7, 748(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s8, 744(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s9, 740(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s10, 736(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s11, 732(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a2, 680(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 0(a1)
-; RV32IM-NEXT: lw a5, 4(a1)
-; RV32IM-NEXT: lw a2, 8(a1)
-; RV32IM-NEXT: sw a2, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 12(a1)
-; RV32IM-NEXT: sw a1, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 0(a0)
-; RV32IM-NEXT: lw a2, 4(a0)
-; RV32IM-NEXT: lw s10, 8(a0)
-; RV32IM-NEXT: lw a0, 12(a0)
-; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 16
+; RV32IM-NEXT: addi sp, sp, -688
+; RV32IM-NEXT: sw ra, 684(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s0, 680(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 676(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s2, 672(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s3, 668(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s4, 664(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s5, 660(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s6, 656(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s7, 652(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s8, 648(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s9, 644(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s10, 640(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s11, 636(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a3, 632(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a2, 628(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s2, 0(a1)
+; RV32IM-NEXT: lw t6, 4(a1)
+; RV32IM-NEXT: lw t5, 8(a1)
+; RV32IM-NEXT: lw t2, 12(a1)
+; RV32IM-NEXT: lw s0, 0(a0)
+; RV32IM-NEXT: lw s1, 4(a0)
+; RV32IM-NEXT: lw t3, 8(a0)
+; RV32IM-NEXT: lw t4, 12(a0)
+; RV32IM-NEXT: lui a1, 16
; RV32IM-NEXT: li a0, 1
-; RV32IM-NEXT: lui s6, 1
-; RV32IM-NEXT: addi t0, a3, -256
-; RV32IM-NEXT: sw t0, 728(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a6, a1, 8
-; RV32IM-NEXT: srli a7, a1, 24
-; RV32IM-NEXT: and a6, a6, t0
-; RV32IM-NEXT: or a6, a6, a7
-; RV32IM-NEXT: sw a6, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli a0, a0, 11
-; RV32IM-NEXT: andi a3, a5, 2
-; RV32IM-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 1
-; RV32IM-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 4
-; RV32IM-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 8
-; RV32IM-NEXT: sw a3, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 16
-; RV32IM-NEXT: sw a3, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 32
-; RV32IM-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 64
-; RV32IM-NEXT: sw a3, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 128
-; RV32IM-NEXT: sw a3, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 256
-; RV32IM-NEXT: sw a3, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 512
-; RV32IM-NEXT: sw a3, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 1024
-; RV32IM-NEXT: sw a3, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a5, a0
-; RV32IM-NEXT: sw a3, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a5, s6
-; RV32IM-NEXT: sw a3, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s4, 2
-; RV32IM-NEXT: and a3, a5, s4
-; RV32IM-NEXT: sw a3, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s3, 4
-; RV32IM-NEXT: and a3, a5, s3
-; RV32IM-NEXT: sw a3, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s2, 8
-; RV32IM-NEXT: and a3, a5, s2
-; RV32IM-NEXT: sw a3, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t6, 16
-; RV32IM-NEXT: and a3, a5, t6
-; RV32IM-NEXT: sw a3, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s1, 32
-; RV32IM-NEXT: and a3, a5, s1
-; RV32IM-NEXT: sw a3, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t5, 64
-; RV32IM-NEXT: and a3, a5, t5
-; RV32IM-NEXT: sw a3, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t4, 128
-; RV32IM-NEXT: and a3, a5, t4
-; RV32IM-NEXT: sw a3, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t3, 256
-; RV32IM-NEXT: and a3, a5, t3
-; RV32IM-NEXT: sw a3, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t1, 512
-; RV32IM-NEXT: and a3, a5, t1
-; RV32IM-NEXT: sw a3, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a6, 1024
-; RV32IM-NEXT: and a3, a5, a6
-; RV32IM-NEXT: sw a3, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a7, 2048
-; RV32IM-NEXT: and a3, a5, a7
-; RV32IM-NEXT: sw a3, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 4096
-; RV32IM-NEXT: and a3, a5, a3
-; RV32IM-NEXT: sw a3, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s11, 8192
-; RV32IM-NEXT: and a3, a5, s11
-; RV32IM-NEXT: sw a3, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s0, 16384
-; RV32IM-NEXT: and a3, a5, s0
-; RV32IM-NEXT: sw a3, 372(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t2, 32768
-; RV32IM-NEXT: and a3, a5, t2
-; RV32IM-NEXT: sw a3, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t0, 65536
-; RV32IM-NEXT: and a3, a5, t0
-; RV32IM-NEXT: sw a3, 364(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 131072
-; RV32IM-NEXT: and a3, a5, a3
-; RV32IM-NEXT: sw a3, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui ra, 262144
-; RV32IM-NEXT: and a3, a5, ra
-; RV32IM-NEXT: sw a3, 356(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 524288
-; RV32IM-NEXT: and a5, a5, a3
-; RV32IM-NEXT: sw a5, 352(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 2
-; RV32IM-NEXT: sw a5, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 1
-; RV32IM-NEXT: sw a5, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 4
-; RV32IM-NEXT: sw a5, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 8
-; RV32IM-NEXT: sw a5, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 16
-; RV32IM-NEXT: sw a5, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 32
-; RV32IM-NEXT: sw a5, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 64
-; RV32IM-NEXT: sw a5, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s9, a4, 128
-; RV32IM-NEXT: sw s9, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 256
-; RV32IM-NEXT: sw a5, 348(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s8, a4, 512
-; RV32IM-NEXT: sw s8, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s7, a4, 1024
-; RV32IM-NEXT: sw s7, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s5, a4, a0
-; RV32IM-NEXT: sw s5, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s6, a4, s6
-; RV32IM-NEXT: and s4, a4, s4
-; RV32IM-NEXT: sw s4, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a4, s3
-; RV32IM-NEXT: sw s3, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s2, a4, s2
-; RV32IM-NEXT: sw s2, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t6, a4, t6
-; RV32IM-NEXT: sw t6, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s1, a4, s1
-; RV32IM-NEXT: and t5, a4, t5
-; RV32IM-NEXT: sw t5, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t4, a4, t4
-; RV32IM-NEXT: sw t4, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t3, a4, t3
-; RV32IM-NEXT: sw t3, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t1, a4, t1
-; RV32IM-NEXT: sw t1, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a6, a4, a6
-; RV32IM-NEXT: sw a6, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a7, a4, a7
-; RV32IM-NEXT: lui a5, 4096
-; RV32IM-NEXT: and a5, a4, a5
-; RV32IM-NEXT: sw a5, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s11, a4, s11
-; RV32IM-NEXT: sw s11, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s0, a4, s0
-; RV32IM-NEXT: and t2, a4, t2
-; RV32IM-NEXT: and t0, a4, t0
-; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and a0, a4, a0
-; RV32IM-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and ra, a4, ra
-; RV32IM-NEXT: and a3, a4, a3
-; RV32IM-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
+; RV32IM-NEXT: lui ra, 128
+; RV32IM-NEXT: addi t1, a1, -256
+; RV32IM-NEXT: slli s3, a0, 11
+; RV32IM-NEXT: srli a5, s0, 8
+; RV32IM-NEXT: srli a6, s0, 24
+; RV32IM-NEXT: and a3, s0, t1
+; RV32IM-NEXT: slli a0, s0, 24
+; RV32IM-NEXT: srli a4, s2, 8
+; RV32IM-NEXT: srli a1, s2, 24
+; RV32IM-NEXT: and a7, s2, t1
+; RV32IM-NEXT: slli a2, s2, 24
+; RV32IM-NEXT: and s6, s2, s3
+; RV32IM-NEXT: sw s6, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s4, 1
+; RV32IM-NEXT: and t0, s2, s4
+; RV32IM-NEXT: lui s5, 64
+; RV32IM-NEXT: and s7, s2, s5
+; RV32IM-NEXT: sw s7, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: or a5, a5, a6
+; RV32IM-NEXT: sw a5, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, s2, ra
+; RV32IM-NEXT: sw a5, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a3, a3, 8
+; RV32IM-NEXT: and a4, a4, t1
+; RV32IM-NEXT: slli a7, a7, 8
+; RV32IM-NEXT: or a0, a0, a3
+; RV32IM-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s6
+; RV32IM-NEXT: or a1, a4, a1
+; RV32IM-NEXT: sw a1, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, s1, t0
+; RV32IM-NEXT: or a2, a2, a7
+; RV32IM-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s1, s7
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, a5
+; RV32IM-NEXT: xor a0, a2, a0
+; RV32IM-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 32768
+; RV32IM-NEXT: lui a2, 16384
+; RV32IM-NEXT: and s8, s2, a2
+; RV32IM-NEXT: and s6, s2, a0
+; RV32IM-NEXT: lui s7, 32768
+; RV32IM-NEXT: mul a0, s1, s8
+; RV32IM-NEXT: mul a1, s1, s6
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s9
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s9, 348(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, s9
-; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s8
+; RV32IM-NEXT: and a0, t6, s3
+; RV32IM-NEXT: mul a0, s0, a0
+; RV32IM-NEXT: and a1, t6, s4
+; RV32IM-NEXT: mul a1, s0, a1
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s7
-; RV32IM-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s5
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s6
-; RV32IM-NEXT: mv s5, s6
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s4
+; RV32IM-NEXT: and a0, t6, s5
+; RV32IM-NEXT: mul a0, s0, a0
+; RV32IM-NEXT: and a1, t6, ra
+; RV32IM-NEXT: mul a1, s0, a1
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s3
-; RV32IM-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s2
-; RV32IM-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t6
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s1
-; RV32IM-NEXT: mv t6, s1
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t5
+; RV32IM-NEXT: and a0, t6, a2
+; RV32IM-NEXT: mul a0, s0, a0
+; RV32IM-NEXT: and a1, t6, s7
+; RV32IM-NEXT: mul a1, s0, a1
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t4
-; RV32IM-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t3
-; RV32IM-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t1
-; RV32IM-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, a6
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, a7
-; RV32IM-NEXT: mv a6, a7
-; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, a5
+; RV32IM-NEXT: srli a0, t3, 8
+; RV32IM-NEXT: and a0, a0, t1
+; RV32IM-NEXT: srli a1, t3, 24
+; RV32IM-NEXT: or a0, a0, a1
; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s11
-; RV32IM-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s0
-; RV32IM-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t2
-; RV32IM-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t0
-; RV32IM-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a5, a2, a0
-; RV32IM-NEXT: mul t3, a2, ra
-; RV32IM-NEXT: mv a7, ra
-; RV32IM-NEXT: mul a0, a2, a3
-; RV32IM-NEXT: mv a2, a3
+; RV32IM-NEXT: and a0, t3, t1
+; RV32IM-NEXT: sw t1, 624(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a0, a0, 8
+; RV32IM-NEXT: slli a1, t3, 24
+; RV32IM-NEXT: or a0, a1, a0
; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a0
-; RV32IM-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t4, a1, a0
-; RV32IM-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t5, a1, a0
-; RV32IM-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s1, a1, a0
-; RV32IM-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s2, a1, a0
-; RV32IM-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s3, a1, a0
-; RV32IM-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: srli a0, t5, 8
+; RV32IM-NEXT: and a0, a0, t1
+; RV32IM-NEXT: srli a1, t5, 24
+; RV32IM-NEXT: or a0, a0, a1
; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s4, a1, a0
-; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s6, a1, a0
-; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: and a0, t5, t1
+; RV32IM-NEXT: slli a0, a0, 8
+; RV32IM-NEXT: slli a1, t5, 24
+; RV32IM-NEXT: or a0, a1, a0
; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: mv a7, s3
+; RV32IM-NEXT: sw s3, 620(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s9, t5, s3
+; RV32IM-NEXT: lui a6, 1
+; RV32IM-NEXT: and t1, t5, a6
+; RV32IM-NEXT: mul a0, t4, s9
+; RV32IM-NEXT: mul a1, t4, t1
+; RV32IM-NEXT: xor s11, a0, a1
+; RV32IM-NEXT: lui a5, 64
+; RV32IM-NEXT: and s10, t5, a5
+; RV32IM-NEXT: and a1, t5, ra
+; RV32IM-NEXT: mul a0, t4, s10
+; RV32IM-NEXT: mul a2, t4, a1
+; RV32IM-NEXT: xor s4, a0, a2
+; RV32IM-NEXT: lui a4, 16384
+; RV32IM-NEXT: and s5, t5, a4
+; RV32IM-NEXT: and a0, t5, s7
+; RV32IM-NEXT: mul a2, t4, s5
+; RV32IM-NEXT: mul a3, t4, a0
+; RV32IM-NEXT: xor s3, a2, a3
+; RV32IM-NEXT: and a2, t2, a7
+; RV32IM-NEXT: mul a2, t3, a2
+; RV32IM-NEXT: and a3, t2, a6
+; RV32IM-NEXT: mul a3, t3, a3
+; RV32IM-NEXT: xor a3, a2, a3
+; RV32IM-NEXT: and a2, t2, a5
+; RV32IM-NEXT: mul a2, t3, a2
+; RV32IM-NEXT: and ra, t2, ra
+; RV32IM-NEXT: mul ra, t3, ra
+; RV32IM-NEXT: xor a2, a2, ra
+; RV32IM-NEXT: and ra, t2, a4
+; RV32IM-NEXT: mul ra, t3, ra
+; RV32IM-NEXT: and s7, t2, s7
+; RV32IM-NEXT: mul s7, t3, s7
+; RV32IM-NEXT: xor a7, ra, s7
+; RV32IM-NEXT: lw a4, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul s7, s0, a4
+; RV32IM-NEXT: mul t0, s0, t0
+; RV32IM-NEXT: xor a4, s7, t0
+; RV32IM-NEXT: sw a4, 380(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul t0, s0, a4
+; RV32IM-NEXT: lw a4, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a6, s0, a4
+; RV32IM-NEXT: xor a4, t0, a6
+; RV32IM-NEXT: sw a4, 364(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s0, s8
+; RV32IM-NEXT: mul a4, s0, s6
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: sw a4, 332(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t3, s9
+; RV32IM-NEXT: mul a5, t3, t1
+; RV32IM-NEXT: xor a4, a4, a5
+; RV32IM-NEXT: sw a4, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t3, s10
+; RV32IM-NEXT: mul a1, t3, a1
+; RV32IM-NEXT: xor a1, a4, a1
+; RV32IM-NEXT: sw a1, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, t3, s5
+; RV32IM-NEXT: mul a0, t3, a0
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a0, a1, a0
; RV32IM-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s7, a1, a0
-; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s8, a1, a0
-; RV32IM-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, a1, a0
-; RV32IM-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s11, a1, a0
-; RV32IM-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a0, a1, a0
+; RV32IM-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 2
+; RV32IM-NEXT: and a0, s2, a0
+; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, a0
+; RV32IM-NEXT: lw a1, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul ra, a1, a0
-; RV32IM-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 380(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: lui a0, 256
+; RV32IM-NEXT: and a0, s2, a0
+; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t0, 256
+; RV32IM-NEXT: mul a0, s1, a0
+; RV32IM-NEXT: lw a5, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a5, a0
; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 368(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 364(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: lui a6, 65536
+; RV32IM-NEXT: and a0, s2, a6
+; RV32IM-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s1, a0
+; RV32IM-NEXT: lw t1, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, t1, a5
; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: lui t1, 2
+; RV32IM-NEXT: and a5, t6, t1
+; RV32IM-NEXT: mul a5, s0, a5
+; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a5
+; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, t6, t0
+; RV32IM-NEXT: mul a5, s0, a5
+; RV32IM-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a5
+; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, t6, a6
+; RV32IM-NEXT: lui t0, 65536
+; RV32IM-NEXT: mul a5, s0, a5
+; RV32IM-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a5
+; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a0, a1, a0
+; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a0, a1, a0
+; RV32IM-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, t5, t1
+; RV32IM-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, t4, a0
+; RV32IM-NEXT: xor a0, s11, a6
+; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 256
+; RV32IM-NEXT: and a0, t5, a5
+; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, t4, a0
+; RV32IM-NEXT: xor a0, s4, a6
+; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, t5, t0
+; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, t4, a0
+; RV32IM-NEXT: mv ra, t4
+; RV32IM-NEXT: xor a0, s3, a6
+; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a6, t2, t1
+; RV32IM-NEXT: mul a6, t3, a6
+; RV32IM-NEXT: xor a0, a3, a6
+; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, t2, a5
+; RV32IM-NEXT: mul a3, t3, a3
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: sw a2, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, t2, t0
+; RV32IM-NEXT: mul a2, t3, a2
+; RV32IM-NEXT: xor a0, a7, a2
+; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 2
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t6, 2
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 1
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, s0, 1
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 4
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t6, 4
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 2
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, s0, 2
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 8
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t6, 8
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 3
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, s0, 3
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 16
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t6, 16
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 4
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, s0, 4
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 32
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t6, 32
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 5
+; RV32IM-NEXT: and a0, a2, a6
; RV32IM-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: slli a6, s0, 5
+; RV32IM-NEXT: and a0, a3, a6
; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s9
-; RV32IM-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s5
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 64
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t6, 64
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 6
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, s0, 6
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 128
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t6, 128
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 7
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, s0, 7
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 256
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t6, 256
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 8
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, s0, 8
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
; RV32IM-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: andi a2, s2, 512
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t6, 512
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 9
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, s0, 9
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s11, 4
+; RV32IM-NEXT: lui a0, 8
+; RV32IM-NEXT: lui a1, 32
+; RV32IM-NEXT: lui a2, 512
+; RV32IM-NEXT: lui a4, 1024
+; RV32IM-NEXT: lui t4, 2048
+; RV32IM-NEXT: lui s6, 4096
+; RV32IM-NEXT: lui s7, 8192
+; RV32IM-NEXT: lui s8, 131072
+; RV32IM-NEXT: lui s9, 262144
+; RV32IM-NEXT: lui s10, 524288
+; RV32IM-NEXT: andi a3, s2, 1
+; RV32IM-NEXT: andi a5, s2, 1024
+; RV32IM-NEXT: sw a5, 356(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a6, s2, s11
+; RV32IM-NEXT: and a7, s2, a0
+; RV32IM-NEXT: lui a5, 16
+; RV32IM-NEXT: and t0, s2, a5
+; RV32IM-NEXT: and t1, s2, a1
+; RV32IM-NEXT: and s3, s2, a2
+; RV32IM-NEXT: and s4, s2, a4
+; RV32IM-NEXT: and s5, s2, t4
+; RV32IM-NEXT: and s6, s2, s6
+; RV32IM-NEXT: and s7, s2, s7
+; RV32IM-NEXT: and s8, s2, s8
+; RV32IM-NEXT: and s9, s2, s9
+; RV32IM-NEXT: and s2, s2, s10
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: andi s10, t6, 1
+; RV32IM-NEXT: seqz s10, s10
+; RV32IM-NEXT: and s11, t6, s11
+; RV32IM-NEXT: mul s11, s0, s11
+; RV32IM-NEXT: sw s11, 292(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, t6, a0
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, t6, a5
+; RV32IM-NEXT: mul a0, s0, s11
; RV32IM-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: and s11, t6, a1
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, t6, a2
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, t6, a4
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, t6, t4
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 4096
+; RV32IM-NEXT: and s11, t6, a0
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 8192
+; RV32IM-NEXT: and s11, t6, a0
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 131072
+; RV32IM-NEXT: and s11, t6, a0
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 262144
+; RV32IM-NEXT: and s11, t6, a0
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 524288
+; RV32IM-NEXT: and s11, t6, a0
+; RV32IM-NEXT: mul a0, s0, s11
+; RV32IM-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a4, s0, a0
+; RV32IM-NEXT: mul a0, s1, a6
+; RV32IM-NEXT: sw a0, 264(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s0, a6
+; RV32IM-NEXT: sw a0, 288(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, a7
+; RV32IM-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s0, a7
+; RV32IM-NEXT: sw a0, 324(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, t0
; RV32IM-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t6
-; RV32IM-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: mul a0, s0, t0
+; RV32IM-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, t1
; RV32IM-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a6
-; RV32IM-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s0
-; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t2
-; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t0
-; RV32IM-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: mul a0, s0, t1
+; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a1, s0, a0
+; RV32IM-NEXT: mul a0, s1, s3
+; RV32IM-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s0, s3
+; RV32IM-NEXT: sw a0, 276(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s4
+; RV32IM-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s0, s4
+; RV32IM-NEXT: sw a0, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s5
; RV32IM-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a7
-; RV32IM-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a2
-; RV32IM-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, a1, a2
-; RV32IM-NEXT: slli a1, a1, 24
-; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a0, a4, 8
-; RV32IM-NEXT: and a0, a0, a2
-; RV32IM-NEXT: srli a1, a4, 24
-; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, a4, a2
-; RV32IM-NEXT: slli a4, a4, 24
-; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a0, a4, a0
-; RV32IM-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, a5, t3
+; RV32IM-NEXT: mul a0, s0, s5
+; RV32IM-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s6
+; RV32IM-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s0, s6
+; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s7
+; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s0, s7
; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t4, a3
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t5, s1
-; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, s2, s3
-; RV32IM-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, s4, s6
+; RV32IM-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a0, s0, a0
+; RV32IM-NEXT: mul a2, s1, s8
+; RV32IM-NEXT: sw a2, 252(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s0, s8
+; RV32IM-NEXT: sw a2, 268(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s1, s9
+; RV32IM-NEXT: sw a2, 280(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s0, s9
+; RV32IM-NEXT: sw a2, 304(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s1, s2
+; RV32IM-NEXT: sw a2, 336(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s0, s2
+; RV32IM-NEXT: sw a2, 384(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: addi s10, s10, -1
+; RV32IM-NEXT: slli a6, s1, 10
+; RV32IM-NEXT: and s1, a3, s1
+; RV32IM-NEXT: sw s1, 232(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, s10, s0
+; RV32IM-NEXT: sw a2, 240(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, a3, s0
+; RV32IM-NEXT: sw a3, 260(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli s0, s0, 10
+; RV32IM-NEXT: andi a3, t6, 1024
+; RV32IM-NEXT: lw a2, 356(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and a5, a2, a6
+; RV32IM-NEXT: sw a5, 308(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, a3, s0
+; RV32IM-NEXT: sw a3, 316(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, a2, s0
+; RV32IM-NEXT: sw a2, 356(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a4
+; RV32IM-NEXT: sw a2, 380(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 364(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: sw a1, 364(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
; RV32IM-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, s7, s8
-; RV32IM-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t1, s11
-; RV32IM-NEXT: sw a0, 324(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, ra, a0
-; RV32IM-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a0, s10, 8
+; RV32IM-NEXT: andi a0, t5, 2
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t2, 2
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 1
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 212(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t3, 1
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
; RV32IM-NEXT: and a0, a0, a2
-; RV32IM-NEXT: srli a1, s10, 24
-; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s5, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: andi a0, s5, 2
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 1
-; RV32IM-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 4
-; RV32IM-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 8
-; RV32IM-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 16
-; RV32IM-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 32
-; RV32IM-NEXT: sw a0, 292(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 64
-; RV32IM-NEXT: sw a0, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 128
-; RV32IM-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 256
-; RV32IM-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 512
-; RV32IM-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 1024
; RV32IM-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a1, s5, a0
-; RV32IM-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 1
-; RV32IM-NEXT: and a1, s5, a3
-; RV32IM-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t1, 2
-; RV32IM-NEXT: and a1, s5, t1
-; RV32IM-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t0, 4
-; RV32IM-NEXT: and a1, s5, t0
-; RV32IM-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t4, 8
-; RV32IM-NEXT: and a1, s5, t4
-; RV32IM-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s9, 16
-; RV32IM-NEXT: and a1, s5, s9
-; RV32IM-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t3, 32
-; RV32IM-NEXT: and a1, s5, t3
-; RV32IM-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t5, 64
-; RV32IM-NEXT: and a1, s5, t5
-; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t6, 128
-; RV32IM-NEXT: and a1, s5, t6
-; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s1, 256
-; RV32IM-NEXT: and a1, s5, s1
+; RV32IM-NEXT: andi a0, t5, 4
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t2, 4
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 2
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 200(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t3, 2
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 8
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t2, 8
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 3
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 160(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t3, 3
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 16
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t2, 16
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 4
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t3, 4
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 148(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 32
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t2, 32
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 5
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t3, 5
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 64
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t2, 64
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 6
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 180(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t3, 6
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 196(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 216(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 128
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t2, 128
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 7
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t3, 7
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 256
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t2, 256
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 8
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t3, 8
+; RV32IM-NEXT: and a1, a1, a2
; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s7, 512
-; RV32IM-NEXT: and a1, s5, s7
-; RV32IM-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s4, 1024
-; RV32IM-NEXT: and a1, s5, s4
-; RV32IM-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s3, 2048
-; RV32IM-NEXT: and a1, s5, s3
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 512
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t2, 512
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 9
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t3, 9
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mv a7, t5
+; RV32IM-NEXT: andi a1, t5, 1
+; RV32IM-NEXT: andi a0, t5, 1024
+; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s9, 4
+; RV32IM-NEXT: and a4, t5, s9
+; RV32IM-NEXT: lui s10, 8
+; RV32IM-NEXT: and t0, t5, s10
+; RV32IM-NEXT: lui t1, 16
+; RV32IM-NEXT: and t6, t5, t1
+; RV32IM-NEXT: lui s8, 32
+; RV32IM-NEXT: and s1, t5, s8
+; RV32IM-NEXT: lui s5, 512
+; RV32IM-NEXT: and s3, t5, s5
+; RV32IM-NEXT: lui s6, 1024
+; RV32IM-NEXT: and s11, t5, s6
+; RV32IM-NEXT: lui s7, 2048
+; RV32IM-NEXT: and a0, t5, s7
+; RV32IM-NEXT: lui t4, 4096
+; RV32IM-NEXT: and a2, t5, t4
+; RV32IM-NEXT: lui t5, 8192
+; RV32IM-NEXT: and a3, a7, t5
+; RV32IM-NEXT: lui s2, 131072
+; RV32IM-NEXT: and a5, a7, s2
+; RV32IM-NEXT: lui s4, 262144
+; RV32IM-NEXT: and a6, a7, s4
+; RV32IM-NEXT: lui s0, 524288
+; RV32IM-NEXT: and a7, a7, s0
+; RV32IM-NEXT: seqz a1, a1
; RV32IM-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a6, 4096
-; RV32IM-NEXT: and a1, s5, a6
-; RV32IM-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 8192
-; RV32IM-NEXT: and a2, s5, a1
-; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a7, 16384
-; RV32IM-NEXT: and a2, s5, a7
-; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a4, 32768
-; RV32IM-NEXT: and a2, s5, a4
-; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 65536
-; RV32IM-NEXT: and a2, s5, a2
-; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s8, 131072
-; RV32IM-NEXT: and a2, s5, s8
-; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 262144
-; RV32IM-NEXT: and a2, s5, a2
-; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 524288
-; RV32IM-NEXT: and a2, s5, a2
-; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s11, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: andi a2, s11, 2
-; RV32IM-NEXT: sw a2, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 1
-; RV32IM-NEXT: sw a2, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 4
-; RV32IM-NEXT: sw a2, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 8
-; RV32IM-NEXT: sw a2, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 16
-; RV32IM-NEXT: sw a2, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 32
-; RV32IM-NEXT: sw a2, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 64
-; RV32IM-NEXT: sw a2, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s2, s11, 128
-; RV32IM-NEXT: andi a5, s11, 256
-; RV32IM-NEXT: sw a5, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi ra, s11, 512
-; RV32IM-NEXT: andi a2, s11, 1024
-; RV32IM-NEXT: and s6, s11, a0
-; RV32IM-NEXT: and a3, s11, a3
-; RV32IM-NEXT: sw a3, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, s11, t1
-; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t2, s11, t0
-; RV32IM-NEXT: and s0, s11, t4
-; RV32IM-NEXT: and t0, s11, s9
-; RV32IM-NEXT: and t4, s11, t3
-; RV32IM-NEXT: and t5, s11, t5
-; RV32IM-NEXT: and t6, s11, t6
-; RV32IM-NEXT: and s1, s11, s1
-; RV32IM-NEXT: and t3, s11, s7
-; RV32IM-NEXT: and a0, s11, s4
-; RV32IM-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s4, s11, s3
-; RV32IM-NEXT: and s3, s11, a6
-; RV32IM-NEXT: and s9, s11, a1
-; RV32IM-NEXT: and a7, s11, a7
-; RV32IM-NEXT: and s7, s11, a4
-; RV32IM-NEXT: lui a1, 65536
-; RV32IM-NEXT: and a6, s11, a1
-; RV32IM-NEXT: and t1, s11, s8
-; RV32IM-NEXT: lui a1, 262144
-; RV32IM-NEXT: and s5, s11, a1
-; RV32IM-NEXT: lui a1, 524288
-; RV32IM-NEXT: and s8, s11, a1
-; RV32IM-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s11, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, s11
-; RV32IM-NEXT: sw a4, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 216(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s2
-; RV32IM-NEXT: sw a4, 204(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a5
-; RV32IM-NEXT: sw a4, 200(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, ra
-; RV32IM-NEXT: sw a4, 288(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a2
-; RV32IM-NEXT: sw a4, 168(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s6
-; RV32IM-NEXT: sw a4, 196(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a3
-; RV32IM-NEXT: sw a4, 188(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a3
-; RV32IM-NEXT: sw a4, 284(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t2
-; RV32IM-NEXT: sw a4, 164(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s0
-; RV32IM-NEXT: sw a4, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t0
-; RV32IM-NEXT: sw a4, 180(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t4
-; RV32IM-NEXT: sw a4, 176(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t5
-; RV32IM-NEXT: sw a4, 276(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t6
-; RV32IM-NEXT: sw a4, 152(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s1
-; RV32IM-NEXT: sw a4, 320(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t3
-; RV32IM-NEXT: sw a4, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a0
-; RV32IM-NEXT: sw a4, 160(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s4
-; RV32IM-NEXT: sw a4, 156(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s3
-; RV32IM-NEXT: sw a4, 272(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s9
-; RV32IM-NEXT: sw a4, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a7
-; RV32IM-NEXT: sw a4, 316(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s7
-; RV32IM-NEXT: sw a4, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a6
-; RV32IM-NEXT: sw a4, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t1
-; RV32IM-NEXT: sw a4, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mv a4, t1
-; RV32IM-NEXT: mul a0, a1, s5
-; RV32IM-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a1, s8
-; RV32IM-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 304(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
+; RV32IM-NEXT: andi a1, t2, 1
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mv a1, t2
+; RV32IM-NEXT: and t2, t2, s9
+; RV32IM-NEXT: mul t2, t3, t2
+; RV32IM-NEXT: sw t2, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, s10
+; RV32IM-NEXT: mul t2, t3, t2
+; RV32IM-NEXT: sw t2, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, t1
+; RV32IM-NEXT: mul t1, t3, t2
+; RV32IM-NEXT: sw t1, 192(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, s8
+; RV32IM-NEXT: mul t1, t3, t2
+; RV32IM-NEXT: sw t1, 236(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, s5
+; RV32IM-NEXT: mul t1, t3, t2
+; RV32IM-NEXT: sw t1, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, s6
+; RV32IM-NEXT: mul t1, t3, t2
+; RV32IM-NEXT: sw t1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, s7
+; RV32IM-NEXT: mul t1, t3, t2
+; RV32IM-NEXT: sw t1, 164(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, t4
+; RV32IM-NEXT: mul t1, t3, t2
+; RV32IM-NEXT: sw t1, 220(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, t5
+; RV32IM-NEXT: mul t1, t3, t2
+; RV32IM-NEXT: sw t1, 244(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, s2
+; RV32IM-NEXT: mul t1, t3, t2
+; RV32IM-NEXT: sw t1, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, s4
+; RV32IM-NEXT: mul t1, t3, t2
+; RV32IM-NEXT: sw t1, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t2, a1, s0
+; RV32IM-NEXT: mv t5, a1
+; RV32IM-NEXT: mul a1, t3, t2
+; RV32IM-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a1, t3, a1
+; RV32IM-NEXT: mul s9, ra, a4
+; RV32IM-NEXT: mul a4, t3, a4
+; RV32IM-NEXT: sw a4, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, ra, t0
+; RV32IM-NEXT: sw a4, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t3, t0
+; RV32IM-NEXT: sw a4, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, ra, t6
+; RV32IM-NEXT: sw a4, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t3, t6
+; RV32IM-NEXT: sw a4, 144(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, ra, s1
+; RV32IM-NEXT: sw a4, 172(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t3, s1
+; RV32IM-NEXT: sw a4, 440(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a4, t3, a4
+; RV32IM-NEXT: mul s8, ra, s3
+; RV32IM-NEXT: mul t0, t3, s3
+; RV32IM-NEXT: sw t0, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t0, ra, s11
+; RV32IM-NEXT: sw t0, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t0, t3, s11
+; RV32IM-NEXT: sw t0, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t0, ra, a0
+; RV32IM-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t3, a0
; RV32IM-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 172(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 296(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
+; RV32IM-NEXT: mul a0, ra, a2
+; RV32IM-NEXT: sw a0, 152(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t3, a2
+; RV32IM-NEXT: sw a0, 168(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, ra, a3
+; RV32IM-NEXT: sw a0, 188(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t3, a3
+; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul s11, t3, a0
+; RV32IM-NEXT: mul s6, ra, a5
+; RV32IM-NEXT: mul a0, t3, a5
+; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, ra, a6
+; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t3, a6
+; RV32IM-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, ra, a7
; RV32IM-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 292(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s11, s10, s11
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, s10, s2
-; RV32IM-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, s10, ra
-; RV32IM-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, s10, a2
-; RV32IM-NEXT: sw a1, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul ra, s10, s6
-; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, s10, a3
-; RV32IM-NEXT: mul a0, s10, t2
-; RV32IM-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, s0
-; RV32IM-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t2, s10, t0
-; RV32IM-NEXT: mul t4, s10, t4
-; RV32IM-NEXT: mul t1, s10, t5
-; RV32IM-NEXT: mul a0, s10, t6
-; RV32IM-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, s1
-; RV32IM-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, t3
-; RV32IM-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t3, s10, a0
-; RV32IM-NEXT: mul t5, s10, s4
-; RV32IM-NEXT: mul t0, s10, s3
-; RV32IM-NEXT: mul a0, s10, s9
-; RV32IM-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, a7
-; RV32IM-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, s7
-; RV32IM-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, a6
-; RV32IM-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, a4
-; RV32IM-NEXT: mul s5, s10, s5
-; RV32IM-NEXT: mul a4, s10, s8
-; RV32IM-NEXT: lw a1, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and s3, s10, a1
-; RV32IM-NEXT: slli s10, s10, 24
-; RV32IM-NEXT: slli s3, s3, 8
-; RV32IM-NEXT: or a5, s10, s3
-; RV32IM-NEXT: lw s9, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s6, s9, 8
-; RV32IM-NEXT: and s6, s6, a1
-; RV32IM-NEXT: srli s7, s9, 24
-; RV32IM-NEXT: or a6, s6, s7
-; RV32IM-NEXT: and s7, s9, a1
-; RV32IM-NEXT: slli s9, s9, 24
-; RV32IM-NEXT: slli s7, s7, 8
-; RV32IM-NEXT: or a7, s9, s7
-; RV32IM-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, a2, a1
-; RV32IM-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, a1, a2
-; RV32IM-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, a1, a2
-; RV32IM-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 200(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s4, a1, a2
-; RV32IM-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 188(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, a1, a2
-; RV32IM-NEXT: lw a1, 180(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 176(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, a1, a2
-; RV32IM-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 156(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, a1, a2
-; RV32IM-NEXT: lw a1, 148(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s9, a2, a1
-; RV32IM-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, a2, a1
-; RV32IM-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a0, t3, a7
+; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t4, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi t4, t4, -1
+; RV32IM-NEXT: lw t1, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli a7, ra, 10
+; RV32IM-NEXT: and a0, t4, ra
+; RV32IM-NEXT: and t1, t1, t3
+; RV32IM-NEXT: and a3, t4, t3
+; RV32IM-NEXT: slli t3, t3, 10
+; RV32IM-NEXT: andi t4, t5, 1024
+; RV32IM-NEXT: lw a2, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: seqz s0, a2
+; RV32IM-NEXT: seqz t4, t4
+; RV32IM-NEXT: addi s0, s0, -1
+; RV32IM-NEXT: addi ra, t4, -1
+; RV32IM-NEXT: and a2, s0, a7
+; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, ra, t3
+; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, s0, t3
+; RV32IM-NEXT: sw a2, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: sw a1, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a4
+; RV32IM-NEXT: sw a1, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, s11
+; RV32IM-NEXT: sw a1, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 232(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: sw a1, 512(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 448(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 396(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 340(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 264(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 396(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 256(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, a2, a1
-; RV32IM-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 252(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 240(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: sw a1, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 468(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 408(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 376(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 348(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 368(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 292(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 368(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 284(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 272(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: sw a1, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, s11, a1
-; RV32IM-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a1, a2
-; RV32IM-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, s2, a1
-; RV32IM-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor ra, ra, a1
-; RV32IM-NEXT: xor t2, t2, t4
-; RV32IM-NEXT: xor t3, t3, t5
-; RV32IM-NEXT: xor t4, a0, s5
-; RV32IM-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 340(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, a0, a1
-; RV32IM-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s5, a1, a0
-; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a1, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 200(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s10, a0, a1
+; RV32IM-NEXT: lw a0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, a0, a1
+; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor s9, a0, s9
; RV32IM-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor s8, a0, s8
; RV32IM-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 336(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor s6, a0, s6
+; RV32IM-NEXT: lw a0, 224(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, t1, a0
+; RV32IM-NEXT: lw a0, 204(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 176(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, a0, a1
+; RV32IM-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, a0, s0
+; RV32IM-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, a0, s1
+; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s2, a0, a1
; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 328(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 324(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a5, a0
-; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: or a0, a7, a6
-; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a5, t6, s1
-; RV32IM-NEXT: lw a0, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, s3, a0
+; RV32IM-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, a0, a1
+; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, a0, a1
+; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, a1, a0
+; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a0, a1
+; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, a0, a1
+; RV32IM-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, a0, a1
; RV32IM-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, s4, a0
-; RV32IM-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, s6, a0
+; RV32IM-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a1, a0
; RV32IM-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s7, a0
-; RV32IM-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s8, a0
-; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a1, a0
; RV32IM-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s9, a0
-; RV32IM-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s0, a0
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a1, a0
+; RV32IM-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a3, a0
+; RV32IM-NEXT: lw a0, 228(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 156(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: lw a2, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: lw a3, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a4, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a4, a3
+; RV32IM-NEXT: lw a4, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: lw a5, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw ra, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, ra, a5
+; RV32IM-NEXT: lw ra, 512(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, ra, s5
+; RV32IM-NEXT: sw ra, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 396(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 296(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 280(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 408(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 520(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, ra, s5
+; RV32IM-NEXT: sw ra, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 404(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 400(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 368(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 396(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 328(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: lw s5, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s10, s5, s10
+; RV32IM-NEXT: sw s10, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s10, 180(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s10, s11, s10
+; RV32IM-NEXT: sw s10, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s10, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s5, s10
+; RV32IM-NEXT: sw s5, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s5, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s9, s5
+; RV32IM-NEXT: sw s5, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s5, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s8, s5
+; RV32IM-NEXT: sw s5, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s5, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s6, s5
+; RV32IM-NEXT: sw s5, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor t6, s7, t6
+; RV32IM-NEXT: sw t6, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 196(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s0, t6
+; RV32IM-NEXT: sw t6, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s1, t6
+; RV32IM-NEXT: sw t6, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s2, t6
+; RV32IM-NEXT: sw t6, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s3, t6
+; RV32IM-NEXT: sw t6, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s4, t6
+; RV32IM-NEXT: sw t6, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a7, t5, a7
+; RV32IM-NEXT: sw a7, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t0, a7
+; RV32IM-NEXT: sw a7, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t1, a7
+; RV32IM-NEXT: sw a7, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 324(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t2, a7
+; RV32IM-NEXT: sw a7, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 320(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t3, a7
+; RV32IM-NEXT: sw a7, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 304(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t4, a7
+; RV32IM-NEXT: sw a7, 512(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a6, a0
+; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 216(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, a1, a0
+; RV32IM-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a0
+; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a3, a0
; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a4, a0
; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a5, a0
; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s10, a0
-; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, s11
-; RV32IM-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, a2, a0
-; RV32IM-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, s2, a0
-; RV32IM-NEXT: xor ra, ra, a3
-; RV32IM-NEXT: xor a0, t2, t1
-; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t3, t0
-; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t4, a4
-; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t5, s5
-; RV32IM-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t1, 61681
+; RV32IM-NEXT: addi t1, t1, -241
+; RV32IM-NEXT: sw t1, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a3, a0, 4
+; RV32IM-NEXT: and a4, a0, t1
+; RV32IM-NEXT: and a3, a3, t1
+; RV32IM-NEXT: slli a4, a4, 4
+; RV32IM-NEXT: or a3, a3, a4
+; RV32IM-NEXT: sw a3, 600(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, a1, a0
-; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a4, a0, 4
+; RV32IM-NEXT: and a7, a0, t1
+; RV32IM-NEXT: and a4, a4, t1
+; RV32IM-NEXT: slli a7, a7, 4
+; RV32IM-NEXT: or a0, a4, a7
+; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor s9, a0, a1
-; RV32IM-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, a1, a0
+; RV32IM-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 448(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor s5, a1, a0
-; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s8, a1, a0
+; RV32IM-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, a0, a1
+; RV32IM-NEXT: lw a0, 316(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s2, a1, a0
+; RV32IM-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 396(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s0, a0
+; RV32IM-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 392(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor s1, s1, a0
-; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 388(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, ra, a0
+; RV32IM-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a7, a0, 4
+; RV32IM-NEXT: and t0, a0, t1
+; RV32IM-NEXT: and a7, a7, t1
+; RV32IM-NEXT: slli t0, t0, 4
+; RV32IM-NEXT: or s6, a7, t0
+; RV32IM-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t0, a0, 4
+; RV32IM-NEXT: and ra, a0, t1
+; RV32IM-NEXT: and t0, t0, t1
+; RV32IM-NEXT: slli ra, ra, 4
+; RV32IM-NEXT: or s7, t0, ra
+; RV32IM-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, a0, a1
+; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a1, a0
+; RV32IM-NEXT: lw a0, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, a1, a0
+; RV32IM-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a1, a0
+; RV32IM-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a0, a1
+; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, a1, a0
-; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, a1, a0
+; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, a1, a0
-; RV32IM-NEXT: xor s4, a5, a6
-; RV32IM-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, a7, a0
+; RV32IM-NEXT: xor a7, a1, a0
; RV32IM-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t6, a0
-; RV32IM-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, s6, a0
-; RV32IM-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, a1, a0
-; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, a0, a1
-; RV32IM-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, a1, a0
-; RV32IM-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a1, a0
-; RV32IM-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a1, a0
-; RV32IM-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor t0, a1, a0
-; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, a0, a1
-; RV32IM-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a0, a1
+; RV32IM-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a0
+; RV32IM-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a4, 520(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a4, a4, a0
-; RV32IM-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a5, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 516(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a5, a5, a0
-; RV32IM-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a0
-; RV32IM-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a3, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a0
-; RV32IM-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, a0, s10
-; RV32IM-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, s11, a0
-; RV32IM-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor ra, ra, a0
-; RV32IM-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, s6, a1
-; RV32IM-NEXT: lw s6, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s6, s7
-; RV32IM-NEXT: sw s6, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s6, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s7, s6
-; RV32IM-NEXT: sw s6, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s6, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, s7, s6
-; RV32IM-NEXT: lw s6, 640(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, s8, s6
-; RV32IM-NEXT: xor s5, s9, s5
-; RV32IM-NEXT: lw s6, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s1, s6
-; RV32IM-NEXT: lw s6, 620(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, s2, s6
-; RV32IM-NEXT: lw s6, 612(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, s3, s6
-; RV32IM-NEXT: xor s0, s4, s0
-; RV32IM-NEXT: sw s0, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s0, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t3, s0
-; RV32IM-NEXT: lw s0, 320(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t4, s0
-; RV32IM-NEXT: lw s0, 316(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, t5, s0
-; RV32IM-NEXT: xor s0, t6, t2
-; RV32IM-NEXT: lw t2, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, t2
-; RV32IM-NEXT: lw t2, 304(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, a0, s11
+; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a2, a0
+; RV32IM-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: lw a2, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw ra, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, ra, a2
+; RV32IM-NEXT: lw ra, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s10, ra
+; RV32IM-NEXT: xor s4, s9, s4
+; RV32IM-NEXT: lw s9, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s5, s9
+; RV32IM-NEXT: lw s9, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s8, s8, s9
+; RV32IM-NEXT: xor s2, s3, s2
+; RV32IM-NEXT: lw s3, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s0, s3
+; RV32IM-NEXT: lw s3, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, s3
+; RV32IM-NEXT: xor t4, t6, t4
+; RV32IM-NEXT: lw t6, 172(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, t5, t6
+; RV32IM-NEXT: lw t6, 152(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, t3, t6
+; RV32IM-NEXT: xor t1, t2, t1
+; RV32IM-NEXT: lw t2, 236(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a7, a7, t2
-; RV32IM-NEXT: lw t2, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw t2, 220(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor t0, t0, t2
-; RV32IM-NEXT: xor a4, t1, a4
-; RV32IM-NEXT: lw t1, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, t1
-; RV32IM-NEXT: lw t1, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t1
-; RV32IM-NEXT: lw t1, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t1
-; RV32IM-NEXT: xor t2, s10, s11
-; RV32IM-NEXT: lw t1, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, ra, t1
-; RV32IM-NEXT: lw t1, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, t1
-; RV32IM-NEXT: lw t1, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, a1, t1
-; RV32IM-NEXT: lui a1, 61681
-; RV32IM-NEXT: addi s4, a1, -241
-; RV32IM-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s9, a1, 4
-; RV32IM-NEXT: and s10, a1, s4
-; RV32IM-NEXT: and s9, s9, s4
-; RV32IM-NEXT: slli s10, s10, 4
-; RV32IM-NEXT: or s9, s9, s10
-; RV32IM-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s10, a1, 4
-; RV32IM-NEXT: and s11, a1, s4
-; RV32IM-NEXT: and s10, s10, s4
-; RV32IM-NEXT: slli s11, s11, 4
-; RV32IM-NEXT: or s10, s10, s11
-; RV32IM-NEXT: lw a1, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, a1, s6
-; RV32IM-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, s7, a1
-; RV32IM-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, s8, a1
-; RV32IM-NEXT: xor s1, s5, s1
-; RV32IM-NEXT: lw a1, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, s2, a1
-; RV32IM-NEXT: lw a1, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, s3, a1
-; RV32IM-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s5, a1, 4
-; RV32IM-NEXT: and s11, a1, s4
-; RV32IM-NEXT: and s5, s5, s4
-; RV32IM-NEXT: slli s11, s11, 4
-; RV32IM-NEXT: or s5, s5, s11
-; RV32IM-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s11, a1, 4
-; RV32IM-NEXT: and ra, a1, s4
-; RV32IM-NEXT: and s11, s11, s4
-; RV32IM-NEXT: slli ra, ra, 4
-; RV32IM-NEXT: or s11, s11, ra
-; RV32IM-NEXT: lw a1, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, a1, t3
-; RV32IM-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t4, a1
-; RV32IM-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, t5, a1
-; RV32IM-NEXT: xor a6, s0, a6
-; RV32IM-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, a1
-; RV32IM-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, a1
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: lw a1, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a1
-; RV32IM-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a1
-; RV32IM-NEXT: xor a5, t2, t6
-; RV32IM-NEXT: lw a1, 716(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a6, a3
+; RV32IM-NEXT: lw a6, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a6
+; RV32IM-NEXT: lw a6, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: xor a0, s11, a0
+; RV32IM-NEXT: lw a6, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a6
+; RV32IM-NEXT: lw a6, 168(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a6
+; RV32IM-NEXT: xor a6, s4, s5
+; RV32IM-NEXT: lw t2, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, s8, t2
+; RV32IM-NEXT: xor t6, s2, s0
+; RV32IM-NEXT: lw s0, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s1, s0
+; RV32IM-NEXT: xor t4, t4, t5
+; RV32IM-NEXT: lw t5, 188(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, t3, t5
+; RV32IM-NEXT: xor a7, t1, a7
+; RV32IM-NEXT: lw t1, 244(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, t1
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: lw a4, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a5, a4
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: lw a1, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, a1
-; RV32IM-NEXT: xor t2, s6, s7
-; RV32IM-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, s8, a1
-; RV32IM-NEXT: xor s0, s1, s2
-; RV32IM-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s3, a1
-; RV32IM-NEXT: xor t3, t3, t4
-; RV32IM-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t5, a1
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, t0, a1
-; RV32IM-NEXT: xor a2, a4, a2
-; RV32IM-NEXT: lw a1, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a1
-; RV32IM-NEXT: xor a0, a5, a0
-; RV32IM-NEXT: lw a1, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, t1, a1
-; RV32IM-NEXT: xor a5, t2, t6
-; RV32IM-NEXT: xor s0, s0, s1
-; RV32IM-NEXT: xor t0, t3, t4
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: xor a2, a2, a3
-; RV32IM-NEXT: xor a0, a0, a4
-; RV32IM-NEXT: lui a4, 209715
-; RV32IM-NEXT: addi t5, a4, 819
-; RV32IM-NEXT: srli a3, s9, 2
-; RV32IM-NEXT: and a7, s9, t5
-; RV32IM-NEXT: and a3, a3, t5
-; RV32IM-NEXT: slli a7, a7, 2
-; RV32IM-NEXT: or a3, a3, a7
-; RV32IM-NEXT: srli a7, s10, 2
-; RV32IM-NEXT: and t1, s10, t5
-; RV32IM-NEXT: and a7, a7, t5
-; RV32IM-NEXT: slli t1, t1, 2
-; RV32IM-NEXT: or t1, a7, t1
-; RV32IM-NEXT: lw a1, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a1
-; RV32IM-NEXT: lw a1, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, s0, a1
-; RV32IM-NEXT: srli t2, s5, 2
-; RV32IM-NEXT: and t3, s5, t5
-; RV32IM-NEXT: and t2, t2, t5
-; RV32IM-NEXT: slli t3, t3, 2
-; RV32IM-NEXT: or t2, t2, t3
-; RV32IM-NEXT: srli t3, s11, 2
-; RV32IM-NEXT: and t4, s11, t5
-; RV32IM-NEXT: and t3, t3, t5
-; RV32IM-NEXT: slli t4, t4, 2
-; RV32IM-NEXT: or t3, t3, t4
-; RV32IM-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, a1
-; RV32IM-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, a1
-; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: sw a1, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lui a2, 209715
+; RV32IM-NEXT: addi s4, a2, 819
+; RV32IM-NEXT: lw a2, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a5, a2, 2
+; RV32IM-NEXT: and t1, a2, s4
+; RV32IM-NEXT: and a5, a5, s4
+; RV32IM-NEXT: slli t1, t1, 2
+; RV32IM-NEXT: or a5, a5, t1
+; RV32IM-NEXT: lw a2, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t1, a2, 2
+; RV32IM-NEXT: and t5, a2, s4
+; RV32IM-NEXT: and t1, t1, s4
+; RV32IM-NEXT: slli t5, t5, 2
+; RV32IM-NEXT: or t1, t1, t5
+; RV32IM-NEXT: xor a6, a6, t2
+; RV32IM-NEXT: xor t2, t6, s0
+; RV32IM-NEXT: srli t5, s6, 2
+; RV32IM-NEXT: and t6, s6, s4
+; RV32IM-NEXT: and t5, t5, s4
+; RV32IM-NEXT: slli t6, t6, 2
+; RV32IM-NEXT: or t5, t5, t6
+; RV32IM-NEXT: srli t6, s7, 2
+; RV32IM-NEXT: and s0, s7, s4
+; RV32IM-NEXT: and t6, t6, s4
+; RV32IM-NEXT: slli s0, s0, 2
+; RV32IM-NEXT: or t6, t6, s0
+; RV32IM-NEXT: xor t3, t4, t3
+; RV32IM-NEXT: xor a7, a7, t0
+; RV32IM-NEXT: xor a3, a3, a4
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, a7, a5
-; RV32IM-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, a6, t0
-; RV32IM-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t4, 349525
-; RV32IM-NEXT: addi t4, t4, 1365
-; RV32IM-NEXT: srli a0, a3, 1
-; RV32IM-NEXT: and a2, a3, t4
-; RV32IM-NEXT: and a0, a0, t4
-; RV32IM-NEXT: slli a2, a2, 1
-; RV32IM-NEXT: or a0, a0, a2
-; RV32IM-NEXT: srli a2, t1, 1
-; RV32IM-NEXT: and a3, t1, t4
-; RV32IM-NEXT: and a2, a2, t4
+; RV32IM-NEXT: lw a1, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a6, a1
+; RV32IM-NEXT: lw a2, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, t2, a2
+; RV32IM-NEXT: lw a2, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, t3, a2
+; RV32IM-NEXT: lw a2, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, a2
+; RV32IM-NEXT: lw a2, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a3, a2
+; RV32IM-NEXT: sw a2, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a0, ra
+; RV32IM-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a1, a4, a1
+; RV32IM-NEXT: sw a1, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a7, a6
+; RV32IM-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s6, 349525
+; RV32IM-NEXT: addi s6, s6, 1365
+; RV32IM-NEXT: srli a0, a5, 1
+; RV32IM-NEXT: and a1, a5, s6
+; RV32IM-NEXT: and a0, a0, s6
+; RV32IM-NEXT: slli a1, a1, 1
+; RV32IM-NEXT: or a1, a0, a1
+; RV32IM-NEXT: srli a0, t1, 1
+; RV32IM-NEXT: and a3, t1, s6
+; RV32IM-NEXT: and a0, a0, s6
; RV32IM-NEXT: slli a3, a3, 1
-; RV32IM-NEXT: or a3, a2, a3
-; RV32IM-NEXT: srli a1, t2, 1
-; RV32IM-NEXT: and a5, t2, t4
-; RV32IM-NEXT: and a1, a1, t4
-; RV32IM-NEXT: slli a5, a5, 1
-; RV32IM-NEXT: or t0, a1, a5
-; RV32IM-NEXT: srli a1, t3, 1
-; RV32IM-NEXT: and a5, t3, t4
-; RV32IM-NEXT: and a1, a1, t4
-; RV32IM-NEXT: slli a5, a5, 1
-; RV32IM-NEXT: or s11, a1, a5
-; RV32IM-NEXT: andi a1, a3, 2
-; RV32IM-NEXT: sw a1, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a3, 1
-; RV32IM-NEXT: andi a6, a3, 4
-; RV32IM-NEXT: andi t1, a3, 8
-; RV32IM-NEXT: andi t2, a3, 16
-; RV32IM-NEXT: andi t3, a3, 32
-; RV32IM-NEXT: andi a7, a3, 64
-; RV32IM-NEXT: andi a1, a3, 128
-; RV32IM-NEXT: sw a1, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t6, a3, 256
-; RV32IM-NEXT: andi s0, a3, 512
-; RV32IM-NEXT: andi s2, a3, 1024
-; RV32IM-NEXT: lw a4, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and s8, a3, a4
-; RV32IM-NEXT: lui a1, 1
-; RV32IM-NEXT: and s9, a3, a1
-; RV32IM-NEXT: lui s1, 2
-; RV32IM-NEXT: and s1, a3, s1
-; RV32IM-NEXT: lui a1, 4
-; RV32IM-NEXT: and s3, a3, a1
-; RV32IM-NEXT: lui a1, 8
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s5, 16
-; RV32IM-NEXT: and s5, a3, s5
-; RV32IM-NEXT: lui s6, 32
-; RV32IM-NEXT: and s6, a3, s6
-; RV32IM-NEXT: lui s7, 64
-; RV32IM-NEXT: and s7, a3, s7
-; RV32IM-NEXT: lui s10, 128
-; RV32IM-NEXT: and s10, a3, s10
-; RV32IM-NEXT: lui ra, 256
-; RV32IM-NEXT: and ra, a3, ra
-; RV32IM-NEXT: lui a1, 512
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 1024
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 2048
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 4096
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 8192
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 16384
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 32768
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 65536
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 131072
-; RV32IM-NEXT: and a1, a3, a1
-; RV32IM-NEXT: sw a1, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 262144
-; RV32IM-NEXT: and a2, a3, a1
+; RV32IM-NEXT: or a0, a0, a3
+; RV32IM-NEXT: srli a3, t5, 1
+; RV32IM-NEXT: and a4, t5, s6
+; RV32IM-NEXT: and a3, a3, s6
+; RV32IM-NEXT: slli a4, a4, 1
+; RV32IM-NEXT: or a5, a3, a4
+; RV32IM-NEXT: srli a3, t6, 1
+; RV32IM-NEXT: and a4, t6, s6
+; RV32IM-NEXT: and a3, a3, s6
+; RV32IM-NEXT: slli a4, a4, 1
+; RV32IM-NEXT: or a6, a3, a4
+; RV32IM-NEXT: lw t2, 620(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a3, a0, t2
+; RV32IM-NEXT: mul a3, a1, a3
+; RV32IM-NEXT: lui a2, 1
+; RV32IM-NEXT: and a4, a0, a2
+; RV32IM-NEXT: mul a4, a1, a4
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: lui t0, 64
+; RV32IM-NEXT: and a4, a0, t0
+; RV32IM-NEXT: mul a4, a1, a4
+; RV32IM-NEXT: lui t3, 128
+; RV32IM-NEXT: and a7, a0, t3
+; RV32IM-NEXT: mul a7, a1, a7
+; RV32IM-NEXT: xor a7, a4, a7
+; RV32IM-NEXT: lui t5, 4096
+; RV32IM-NEXT: and a4, a0, t5
+; RV32IM-NEXT: mul a4, a1, a4
+; RV32IM-NEXT: lui t4, 8192
+; RV32IM-NEXT: and t1, a0, t4
+; RV32IM-NEXT: mul t1, a1, t1
+; RV32IM-NEXT: xor a4, a4, t1
+; RV32IM-NEXT: and t1, a6, t2
+; RV32IM-NEXT: and t2, a6, a2
+; RV32IM-NEXT: mul t1, a5, t1
+; RV32IM-NEXT: mul t2, a5, t2
+; RV32IM-NEXT: xor a2, t1, t2
+; RV32IM-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t1, a6, t0
+; RV32IM-NEXT: and t2, a6, t3
+; RV32IM-NEXT: mul t1, a5, t1
+; RV32IM-NEXT: mul t2, a5, t2
+; RV32IM-NEXT: xor a2, t1, t2
+; RV32IM-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t1, a6, t5
+; RV32IM-NEXT: and t2, a6, t4
+; RV32IM-NEXT: mul t1, a5, t1
+; RV32IM-NEXT: mul t2, a5, t2
+; RV32IM-NEXT: xor a2, t1, t2
+; RV32IM-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 2
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 1
+; RV32IM-NEXT: and a2, t1, t2
+; RV32IM-NEXT: sw a2, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 4
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 2
+; RV32IM-NEXT: and a2, t1, t2
+; RV32IM-NEXT: sw a2, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 8
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 3
+; RV32IM-NEXT: and a2, t1, t2
+; RV32IM-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 16
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 4
+; RV32IM-NEXT: and a2, t1, t2
+; RV32IM-NEXT: sw a2, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 32
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 5
+; RV32IM-NEXT: and a2, t1, t2
+; RV32IM-NEXT: sw a2, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 64
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 6
+; RV32IM-NEXT: and a2, t1, t2
+; RV32IM-NEXT: sw a2, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 128
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 7
+; RV32IM-NEXT: and a2, t1, t2
; RV32IM-NEXT: sw a2, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 524288
-; RV32IM-NEXT: and a2, a3, a2
-; RV32IM-NEXT: lw a3, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, a5
-; RV32IM-NEXT: sw a3, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, a6
-; RV32IM-NEXT: sw a3, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, t1
-; RV32IM-NEXT: sw a3, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, t2
-; RV32IM-NEXT: sw a3, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, t3
-; RV32IM-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, a7
-; RV32IM-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, t6
-; RV32IM-NEXT: sw a3, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s0
-; RV32IM-NEXT: sw a3, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s2
-; RV32IM-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s8
-; RV32IM-NEXT: sw a3, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s9
-; RV32IM-NEXT: sw a3, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s1
-; RV32IM-NEXT: sw a3, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s3
-; RV32IM-NEXT: sw a3, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s5
-; RV32IM-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s6
-; RV32IM-NEXT: sw a3, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s7
-; RV32IM-NEXT: sw a3, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s10
-; RV32IM-NEXT: sw a3, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, ra
-; RV32IM-NEXT: sw a3, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 640(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: sw a3, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a0, a2
-; RV32IM-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t2, s11, a4
-; RV32IM-NEXT: lui a0, 1
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 2
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 4
-; RV32IM-NEXT: and s1, s11, a0
-; RV32IM-NEXT: lui a0, 8
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 16
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 32
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 64
-; RV32IM-NEXT: and s6, s11, a0
-; RV32IM-NEXT: lui a0, 128
-; RV32IM-NEXT: and s7, s11, a0
-; RV32IM-NEXT: lui a0, 256
-; RV32IM-NEXT: and s10, s11, a0
-; RV32IM-NEXT: lui a0, 512
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 1024
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 2048
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 4096
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 8192
-; RV32IM-NEXT: and a0, s11, a0
+; RV32IM-NEXT: andi t1, a0, 256
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 8
+; RV32IM-NEXT: and a2, t1, t2
+; RV32IM-NEXT: sw a2, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 512
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 9
+; RV32IM-NEXT: and a2, t1, t2
+; RV32IM-NEXT: sw a2, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 1
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: lui a2, 2
+; RV32IM-NEXT: and t2, a0, a2
+; RV32IM-NEXT: mul a2, a1, t2
+; RV32IM-NEXT: lui t0, 4
+; RV32IM-NEXT: and t2, a0, t0
+; RV32IM-NEXT: mul t0, a1, t2
+; RV32IM-NEXT: sw t0, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s3, 8
+; RV32IM-NEXT: and t2, a0, s3
+; RV32IM-NEXT: mul t0, a1, t2
+; RV32IM-NEXT: sw t0, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s5, 16
+; RV32IM-NEXT: and t2, a0, s5
+; RV32IM-NEXT: mul t0, a1, t2
+; RV32IM-NEXT: sw t0, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s8, 32
+; RV32IM-NEXT: and t2, a0, s8
+; RV32IM-NEXT: mul t0, a1, t2
+; RV32IM-NEXT: sw t0, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t0, 256
+; RV32IM-NEXT: and t2, a0, t0
+; RV32IM-NEXT: mul t2, a1, t2
+; RV32IM-NEXT: lui s7, 512
+; RV32IM-NEXT: and t3, a0, s7
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: sw t3, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s9, 1024
+; RV32IM-NEXT: and t3, a0, s9
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: sw t3, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s11, 2048
+; RV32IM-NEXT: and t3, a0, s11
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: sw t3, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s2, 16384
+; RV32IM-NEXT: and t3, a0, s2
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: lui t6, 32768
+; RV32IM-NEXT: and t4, a0, t6
+; RV32IM-NEXT: mul t4, a1, t4
+; RV32IM-NEXT: sw t4, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t5, 65536
+; RV32IM-NEXT: and t4, a0, t5
+; RV32IM-NEXT: mul t4, a1, t4
+; RV32IM-NEXT: sw t4, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui ra, 131072
+; RV32IM-NEXT: and t4, a0, ra
+; RV32IM-NEXT: mul t4, a1, t4
+; RV32IM-NEXT: sw t4, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s0, 262144
+; RV32IM-NEXT: and t4, a0, s0
+; RV32IM-NEXT: mul t4, a1, t4
+; RV32IM-NEXT: sw t4, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s1, 524288
+; RV32IM-NEXT: and t4, a0, s1
+; RV32IM-NEXT: mul t4, a1, t4
+; RV32IM-NEXT: sw t4, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: and t1, t1, a1
+; RV32IM-NEXT: sw t1, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a1, a1, 10
+; RV32IM-NEXT: andi a0, a0, 1024
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: and a0, a0, a1
; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 16384
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 32768
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 65536
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and a0, s11, a0
+; RV32IM-NEXT: xor a2, a3, a2
+; RV32IM-NEXT: sw a2, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a7, t2
; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, s11, a1
+; RV32IM-NEXT: xor a0, a4, t3
; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 524288
-; RV32IM-NEXT: and s2, s11, a0
-; RV32IM-NEXT: andi s8, s11, 2
-; RV32IM-NEXT: andi a0, s11, 1
-; RV32IM-NEXT: andi a1, s11, 4
-; RV32IM-NEXT: andi a2, s11, 8
-; RV32IM-NEXT: andi a3, s11, 16
-; RV32IM-NEXT: andi a4, s11, 32
-; RV32IM-NEXT: andi a5, s11, 64
-; RV32IM-NEXT: andi a6, s11, 128
-; RV32IM-NEXT: andi a7, s11, 256
-; RV32IM-NEXT: andi t1, s11, 512
-; RV32IM-NEXT: andi s11, s11, 1024
-; RV32IM-NEXT: mul ra, t0, s8
-; RV32IM-NEXT: mul s8, t0, a0
-; RV32IM-NEXT: mul s9, t0, a1
-; RV32IM-NEXT: mul s3, t0, a2
-; RV32IM-NEXT: mul s5, t0, a3
-; RV32IM-NEXT: mul s0, t0, a4
-; RV32IM-NEXT: mul a0, t0, a5
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, t0, a6
-; RV32IM-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t6, t0, a7
-; RV32IM-NEXT: mul t3, t0, t1
-; RV32IM-NEXT: mul a0, t0, s11
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, t0, t2
-; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t0, a0
-; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, t0, a0
-; RV32IM-NEXT: mul s1, t0, s1
-; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t0, a0
+; RV32IM-NEXT: andi a0, a6, 2
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a5, 1
+; RV32IM-NEXT: and a0, a0, a1
; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a6, 4
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a5, 2
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a6, 8
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a5, 3
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a6, 16
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a5, 4
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a6, 32
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a5, 5
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a6, 64
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a5, 6
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a6, 128
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a3, a5, 7
+; RV32IM-NEXT: and a0, a0, a3
+; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a6, 256
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a5, 8
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a1, a6, 512
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a4, a5, 9
+; RV32IM-NEXT: and a1, a1, a4
+; RV32IM-NEXT: sw a1, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 2
+; RV32IM-NEXT: and a1, a6, a0
+; RV32IM-NEXT: lui a0, 4
+; RV32IM-NEXT: and a0, a6, a0
+; RV32IM-NEXT: and a2, a6, s3
+; RV32IM-NEXT: and a3, a6, s5
+; RV32IM-NEXT: and a4, a6, s8
+; RV32IM-NEXT: and a7, a6, t0
+; RV32IM-NEXT: and t0, a6, s7
+; RV32IM-NEXT: and t1, a6, s9
+; RV32IM-NEXT: and t2, a6, s11
+; RV32IM-NEXT: and t3, a6, s2
+; RV32IM-NEXT: and t4, a6, t6
+; RV32IM-NEXT: and t5, a6, t5
+; RV32IM-NEXT: and t6, a6, ra
+; RV32IM-NEXT: and s0, a6, s0
+; RV32IM-NEXT: and s1, a6, s1
+; RV32IM-NEXT: andi ra, a6, 1
+; RV32IM-NEXT: seqz ra, ra
+; RV32IM-NEXT: mul a1, a5, a1
+; RV32IM-NEXT: mul s5, a5, a0
+; RV32IM-NEXT: mul s9, a5, a2
+; RV32IM-NEXT: mul a0, a5, a3
+; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a5, a4
+; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a5, a7
+; RV32IM-NEXT: mul s3, a5, t0
+; RV32IM-NEXT: mul s8, a5, t1
+; RV32IM-NEXT: mul s11, a5, t2
+; RV32IM-NEXT: mul t3, a5, t3
+; RV32IM-NEXT: mul s2, a5, t4
+; RV32IM-NEXT: mul s7, a5, t5
+; RV32IM-NEXT: mul t6, a5, t6
+; RV32IM-NEXT: mul s0, a5, s0
+; RV32IM-NEXT: mul a0, a5, s1
+; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi ra, ra, -1
+; RV32IM-NEXT: and ra, ra, a5
+; RV32IM-NEXT: slli a5, a5, 10
+; RV32IM-NEXT: andi a6, a6, 1024
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: and t5, a6, a5
+; RV32IM-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a0, a1
+; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, a0, a7
+; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a0, t3
+; RV32IM-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a1, a0
+; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a0, a1
+; RV32IM-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a0, a1
; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t0, a0
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t0, a0
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, t0, s6
-; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s6, t0, s7
-; RV32IM-NEXT: mul s7, t0, s10
-; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s10, t0, a0
-; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t0, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t0, a0
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, a0, a1
+; RV32IM-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a0
; RV32IM-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a7, t0, a0
-; RV32IM-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a6, t0, a0
+; RV32IM-NEXT: lw a4, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a0
; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t2, t0, a0
-; RV32IM-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s11, t0, a0
-; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t0, a0
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t0, a0
-; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t0, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, t0, s2
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, a1, a0
-; RV32IM-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a0
+; RV32IM-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, ra, a0
+; RV32IM-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: lw a1, 616(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 468(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a3, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a3
-; RV32IM-NEXT: lw a3, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a4
-; RV32IM-NEXT: lw a4, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a5, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: lw a5, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw t0, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, s1
+; RV32IM-NEXT: xor t4, t4, s5
+; RV32IM-NEXT: xor t1, t1, s3
+; RV32IM-NEXT: xor t2, t2, s2
+; RV32IM-NEXT: xor a6, t3, a6
+; RV32IM-NEXT: lw t3, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, t3
+; RV32IM-NEXT: lw t3, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, t3
+; RV32IM-NEXT: lw t3, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, t3
+; RV32IM-NEXT: lw t3, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, t3
+; RV32IM-NEXT: lw t3, 520(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, t3
+; RV32IM-NEXT: xor a0, ra, a0
+; RV32IM-NEXT: lw t3, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, t3
+; RV32IM-NEXT: lw t3, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, t3
+; RV32IM-NEXT: xor t3, t4, s9
+; RV32IM-NEXT: xor t1, t1, s8
+; RV32IM-NEXT: xor t2, t2, s7
+; RV32IM-NEXT: xor a6, a6, a7
+; RV32IM-NEXT: lw a7, 512(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t0, a7
+; RV32IM-NEXT: lw t0, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, t0
+; RV32IM-NEXT: lw t0, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, t0
+; RV32IM-NEXT: lw t0, 532(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a5, a5, t0
-; RV32IM-NEXT: xor s8, s8, ra
-; RV32IM-NEXT: xor s3, s9, s3
-; RV32IM-NEXT: xor s0, s5, s0
-; RV32IM-NEXT: xor t3, t6, t3
-; RV32IM-NEXT: xor t1, t1, s1
-; RV32IM-NEXT: xor t6, s6, s7
-; RV32IM-NEXT: xor a6, a7, a6
-; RV32IM-NEXT: xor a0, s2, a0
-; RV32IM-NEXT: lw a7, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a7
-; RV32IM-NEXT: lw a7, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a7
-; RV32IM-NEXT: lw a7, 636(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: xor a1, a2, t5
+; RV32IM-NEXT: lw a2, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, t3, a2
+; RV32IM-NEXT: xor t0, t1, s11
+; RV32IM-NEXT: xor t1, t2, t6
+; RV32IM-NEXT: xor a6, a6, a7
+; RV32IM-NEXT: lw a7, 572(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a3, a3, a7
-; RV32IM-NEXT: lw a7, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a7
-; RV32IM-NEXT: lw a7, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a7, 536(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a5, a5, a7
-; RV32IM-NEXT: xor a7, s8, s3
-; RV32IM-NEXT: lw t0, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, s0, t0
-; RV32IM-NEXT: lw t0, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t3, t0
-; RV32IM-NEXT: lw t0, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t0
-; RV32IM-NEXT: xor t6, t6, s10
-; RV32IM-NEXT: xor a6, a6, t2
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: lw a2, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: lw a3, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a4, a3
-; RV32IM-NEXT: lw a4, 640(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a5, a4
-; RV32IM-NEXT: xor a5, a7, s0
-; RV32IM-NEXT: lw a7, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, t3, a7
-; RV32IM-NEXT: lw t0, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t0
-; RV32IM-NEXT: lw t0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t6, t0
-; RV32IM-NEXT: xor a6, a6, s11
-; RV32IM-NEXT: lw t3, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, t3
-; RV32IM-NEXT: lw t3, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, t3
-; RV32IM-NEXT: lw t3, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t3
-; RV32IM-NEXT: lw t3, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t3
-; RV32IM-NEXT: lw t3, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, t3
-; RV32IM-NEXT: lw t0, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, t0
-; RV32IM-NEXT: lw t0, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, t0
-; RV32IM-NEXT: lw t0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t0
-; RV32IM-NEXT: lw t0, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t2, t0
-; RV32IM-NEXT: lw t0, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, t0
-; RV32IM-NEXT: lw t3, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t3
-; RV32IM-NEXT: lw t3, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, t3
-; RV32IM-NEXT: lw t0, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t0
-; RV32IM-NEXT: lw t0, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, t0
-; RV32IM-NEXT: xor a1, a0, a1
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a4, a2
-; RV32IM-NEXT: xor a4, a5, a7
-; RV32IM-NEXT: xor a4, a4, t1
+; RV32IM-NEXT: xor a2, t1, s0
; RV32IM-NEXT: lw a7, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: xor a1, a1, a3
-; RV32IM-NEXT: lw a3, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a3
-; RV32IM-NEXT: xor a3, a4, t2
-; RV32IM-NEXT: lw a4, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a6, a4
-; RV32IM-NEXT: lw t0, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a6, a1, t0
-; RV32IM-NEXT: srli a7, a1, 8
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: and a2, a3, t0
-; RV32IM-NEXT: xor a4, a3, a4
-; RV32IM-NEXT: srli a3, a3, 8
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: lw a7, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a7
+; RV32IM-NEXT: xor a3, a6, a3
+; RV32IM-NEXT: xor a1, a0, a1
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: xor a1, a1, t0
+; RV32IM-NEXT: lw t0, 624(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a4, a3, t0
+; RV32IM-NEXT: srli a7, a3, 8
+; RV32IM-NEXT: xor a3, a3, a5
+; RV32IM-NEXT: and a5, a1, t0
+; RV32IM-NEXT: xor a2, a1, a2
+; RV32IM-NEXT: srli a1, a1, 8
; RV32IM-NEXT: and a7, a7, t0
-; RV32IM-NEXT: and a3, a3, t0
+; RV32IM-NEXT: and a1, a1, t0
+; RV32IM-NEXT: slli a6, a6, 24
+; RV32IM-NEXT: slli a4, a4, 8
+; RV32IM-NEXT: or a4, a6, a4
+; RV32IM-NEXT: srli a3, a3, 24
+; RV32IM-NEXT: or a3, a7, a3
; RV32IM-NEXT: slli a0, a0, 24
-; RV32IM-NEXT: slli a6, a6, 8
-; RV32IM-NEXT: or a0, a0, a6
-; RV32IM-NEXT: srli a1, a1, 24
-; RV32IM-NEXT: or a1, a7, a1
-; RV32IM-NEXT: slli a5, a5, 24
-; RV32IM-NEXT: slli a2, a2, 8
-; RV32IM-NEXT: or a2, a5, a2
-; RV32IM-NEXT: srli a4, a4, 24
-; RV32IM-NEXT: or a3, a3, a4
+; RV32IM-NEXT: slli a5, a5, 8
+; RV32IM-NEXT: or a0, a0, a5
+; RV32IM-NEXT: srli a2, a2, 24
+; RV32IM-NEXT: or a1, a1, a2
+; RV32IM-NEXT: or a3, a4, a3
; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: or a2, a2, a3
-; RV32IM-NEXT: srli a1, a0, 4
+; RV32IM-NEXT: srli a1, a3, 4
+; RV32IM-NEXT: lw a4, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a2, a3, a4
+; RV32IM-NEXT: srli a3, a0, 4
+; RV32IM-NEXT: and a0, a0, a4
+; RV32IM-NEXT: and a1, a1, a4
+; RV32IM-NEXT: and a3, a3, a4
+; RV32IM-NEXT: slli a2, a2, 4
+; RV32IM-NEXT: slli a0, a0, 4
+; RV32IM-NEXT: or a1, a1, a2
+; RV32IM-NEXT: or a0, a3, a0
+; RV32IM-NEXT: srli a2, a1, 2
+; RV32IM-NEXT: and a1, a1, s4
+; RV32IM-NEXT: srli a3, a0, 2
; RV32IM-NEXT: and a0, a0, s4
-; RV32IM-NEXT: srli a3, a2, 4
; RV32IM-NEXT: and a2, a2, s4
-; RV32IM-NEXT: and a1, a1, s4
; RV32IM-NEXT: and a3, a3, s4
-; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: slli a2, a2, 4
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: or a2, a3, a2
-; RV32IM-NEXT: srli a1, a0, 2
-; RV32IM-NEXT: and a0, a0, t5
-; RV32IM-NEXT: srli a3, a2, 2
-; RV32IM-NEXT: and a2, a2, t5
-; RV32IM-NEXT: and a1, a1, t5
-; RV32IM-NEXT: and a3, a3, t5
+; RV32IM-NEXT: slli a1, a1, 2
+; RV32IM-NEXT: or a1, a2, a1
+; RV32IM-NEXT: lui a2, 349525
+; RV32IM-NEXT: addi a2, a2, 1364
; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: lui a1, 349525
-; RV32IM-NEXT: addi a1, a1, 1364
-; RV32IM-NEXT: slli a2, a2, 2
-; RV32IM-NEXT: or a2, a3, a2
-; RV32IM-NEXT: srli a3, a0, 1
-; RV32IM-NEXT: and a0, a0, t4
-; RV32IM-NEXT: and a4, a2, t4
-; RV32IM-NEXT: srli a2, a2, 1
-; RV32IM-NEXT: and a3, a3, a1
-; RV32IM-NEXT: and a1, a2, a1
-; RV32IM-NEXT: slli a0, a0, 1
; RV32IM-NEXT: or a0, a3, a0
-; RV32IM-NEXT: slli a4, a4, 1
-; RV32IM-NEXT: or a1, a1, a4
+; RV32IM-NEXT: srli a3, a1, 1
+; RV32IM-NEXT: and a1, a1, s6
+; RV32IM-NEXT: and a4, a0, s6
; RV32IM-NEXT: srli a0, a0, 1
-; RV32IM-NEXT: lw a2, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a2
+; RV32IM-NEXT: and a3, a3, a2
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: slli a1, a1, 1
+; RV32IM-NEXT: or a1, a3, a1
+; RV32IM-NEXT: slli a4, a4, 1
+; RV32IM-NEXT: or a0, a0, a4
; RV32IM-NEXT: srli a1, a1, 1
-; RV32IM-NEXT: lw a2, 712(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 604(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 680(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 716(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a0, a0, 1
+; RV32IM-NEXT: lw a2, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a2
+; RV32IM-NEXT: lw a2, 628(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a4, 612(sp) # 4-byte Folded Reload
; RV32IM-NEXT: sw a4, 0(a2)
-; RV32IM-NEXT: sw a0, 4(a2)
-; RV32IM-NEXT: lw a3, 720(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a1, 4(a2)
+; RV32IM-NEXT: lw a3, 616(sp) # 4-byte Folded Reload
; RV32IM-NEXT: sw a3, 8(a2)
-; RV32IM-NEXT: sw a1, 12(a2)
-; RV32IM-NEXT: lw a2, 684(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 12(a2)
+; RV32IM-NEXT: lw a2, 632(sp) # 4-byte Folded Reload
; RV32IM-NEXT: sw a4, 0(a2)
-; RV32IM-NEXT: sw a0, 4(a2)
+; RV32IM-NEXT: sw a1, 4(a2)
; RV32IM-NEXT: sw a3, 8(a2)
-; RV32IM-NEXT: sw a1, 12(a2)
-; RV32IM-NEXT: lw ra, 780(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 776(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 772(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s2, 768(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s3, 764(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s4, 760(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s5, 756(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 752(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 748(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s8, 744(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s9, 740(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s10, 736(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s11, 732(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: addi sp, sp, 784
+; RV32IM-NEXT: sw a0, 12(a2)
+; RV32IM-NEXT: lw ra, 684(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 680(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 676(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 672(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 668(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s4, 664(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 660(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s6, 656(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s7, 652(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s8, 648(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s9, 644(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 640(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s11, 636(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 688
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: commutative_clmul_v2i64:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: addi sp, sp, -960
-; RV64IM-NEXT: sd ra, 952(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s0, 944(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s1, 936(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s2, 928(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s3, 920(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s4, 912(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s5, 904(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s6, 896(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s7, 888(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s8, 880(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s9, 872(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s10, 864(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s11, 856(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd a5, 744(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t2, a3
-; RV64IM-NEXT: mv a3, a2
-; RV64IM-NEXT: andi t5, a2, 2
-; RV64IM-NEXT: andi s0, a2, 1
-; RV64IM-NEXT: andi a7, a2, 4
-; RV64IM-NEXT: andi t1, a2, 8
-; RV64IM-NEXT: andi a2, a2, 16
-; RV64IM-NEXT: andi t6, a3, 32
-; RV64IM-NEXT: andi a5, a3, 64
-; RV64IM-NEXT: andi t0, a3, 128
-; RV64IM-NEXT: andi t3, a3, 256
-; RV64IM-NEXT: andi a6, a3, 512
-; RV64IM-NEXT: andi t4, a3, 1024
-; RV64IM-NEXT: li a4, 1
-; RV64IM-NEXT: lui s6, 4
-; RV64IM-NEXT: lui s7, 8
-; RV64IM-NEXT: lui s8, 128
-; RV64IM-NEXT: lui s9, 256
-; RV64IM-NEXT: lui s10, 8192
-; RV64IM-NEXT: lui s11, 16384
-; RV64IM-NEXT: andi s1, t2, 2
-; RV64IM-NEXT: andi s2, t2, 1
-; RV64IM-NEXT: andi s3, t2, 4
-; RV64IM-NEXT: andi s4, t2, 8
-; RV64IM-NEXT: mul t5, a0, t5
-; RV64IM-NEXT: mul s0, a0, s0
-; RV64IM-NEXT: xor t5, s0, t5
-; RV64IM-NEXT: andi s0, t2, 16
-; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: mul t1, a0, t1
-; RV64IM-NEXT: xor a7, a7, t1
-; RV64IM-NEXT: andi t1, t2, 32
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: mul t6, a0, t6
-; RV64IM-NEXT: xor a2, a2, t6
-; RV64IM-NEXT: andi t6, t2, 128
-; RV64IM-NEXT: mul t0, a0, t0
-; RV64IM-NEXT: mul t3, a0, t3
-; RV64IM-NEXT: xor t0, t0, t3
-; RV64IM-NEXT: andi t3, t2, 256
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: mul s5, a0, a6
-; RV64IM-NEXT: mul a6, a0, t4
-; RV64IM-NEXT: sd a6, 728(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a6, a1, s1
-; RV64IM-NEXT: mul t4, a1, s2
-; RV64IM-NEXT: xor a6, t4, a6
-; RV64IM-NEXT: sd a6, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a6, a4, 11
-; RV64IM-NEXT: sd a6, 760(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul t4, a1, s3
-; RV64IM-NEXT: mul s1, a1, s4
-; RV64IM-NEXT: xor a6, t4, s1
-; RV64IM-NEXT: sd a6, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t4, a3, s6
-; RV64IM-NEXT: mul s0, a1, s0
-; RV64IM-NEXT: mul t1, a1, t1
-; RV64IM-NEXT: xor a6, s0, t1
-; RV64IM-NEXT: sd a6, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t1, a3, s7
-; RV64IM-NEXT: mul t6, a1, t6
-; RV64IM-NEXT: mul t3, a1, t3
-; RV64IM-NEXT: xor a6, t6, t3
-; RV64IM-NEXT: sd a6, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t3, a3, s8
-; RV64IM-NEXT: xor a6, t5, a7
-; RV64IM-NEXT: sd a6, 720(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a7, a3, s9
-; RV64IM-NEXT: xor a5, a2, a5
-; RV64IM-NEXT: sd a5, 712(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a5, a3, s10
-; RV64IM-NEXT: xor a6, t0, s5
-; RV64IM-NEXT: sd a6, 704(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t0, a3, s11
-; RV64IM-NEXT: mul a2, a0, t4
-; RV64IM-NEXT: mul t1, a0, t1
-; RV64IM-NEXT: xor a6, a2, t1
-; RV64IM-NEXT: sd a6, 696(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 31
-; RV64IM-NEXT: sd a2, 752(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul t1, a0, t3
+; RV64IM-NEXT: addi sp, sp, -1104
+; RV64IM-NEXT: sd ra, 1096(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s0, 1088(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 1080(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s2, 1072(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s3, 1064(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s4, 1056(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s5, 1048(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s6, 1040(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s7, 1032(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s8, 1024(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s9, 1016(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s10, 1008(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s11, 1000(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a5, 872(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 864(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s9, a0, 1
+; RV64IM-NEXT: andi s10, a2, 2
+; RV64IM-NEXT: andi a4, a2, 1
+; RV64IM-NEXT: slli s5, a0, 2
+; RV64IM-NEXT: andi s7, a2, 4
+; RV64IM-NEXT: slli s3, a0, 3
+; RV64IM-NEXT: andi s6, a2, 8
+; RV64IM-NEXT: slli t0, a0, 4
+; RV64IM-NEXT: andi t4, a2, 16
+; RV64IM-NEXT: slli a6, a0, 5
+; RV64IM-NEXT: andi t2, a2, 32
+; RV64IM-NEXT: slli a7, a0, 6
+; RV64IM-NEXT: andi t6, a2, 64
+; RV64IM-NEXT: slli t1, a0, 7
+; RV64IM-NEXT: andi s1, a2, 128
+; RV64IM-NEXT: slli t3, a0, 8
+; RV64IM-NEXT: andi s2, a2, 256
+; RV64IM-NEXT: slli t5, a0, 9
+; RV64IM-NEXT: andi s4, a2, 512
+; RV64IM-NEXT: slli s0, a0, 10
+; RV64IM-NEXT: andi s8, a2, 1024
+; RV64IM-NEXT: li a5, 1
+; RV64IM-NEXT: seqz s10, s10
+; RV64IM-NEXT: addi s10, s10, -1
+; RV64IM-NEXT: and s9, s10, s9
+; RV64IM-NEXT: sd s9, 856(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s9, 16
+; RV64IM-NEXT: seqz s7, s7
+; RV64IM-NEXT: addi s7, s7, -1
+; RV64IM-NEXT: and s5, s7, s5
+; RV64IM-NEXT: sd s5, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s7, 32
+; RV64IM-NEXT: seqz s5, s6
+; RV64IM-NEXT: addi s5, s5, -1
+; RV64IM-NEXT: and s3, s5, s3
+; RV64IM-NEXT: sd s3, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s5, 2048
+; RV64IM-NEXT: seqz t4, t4
+; RV64IM-NEXT: addi t4, t4, -1
+; RV64IM-NEXT: and t0, t4, t0
+; RV64IM-NEXT: sd t0, 816(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s6, 4096
+; RV64IM-NEXT: seqz a4, a4
+; RV64IM-NEXT: seqz t0, t2
+; RV64IM-NEXT: seqz t2, t6
+; RV64IM-NEXT: seqz t4, s1
+; RV64IM-NEXT: seqz t6, s2
+; RV64IM-NEXT: seqz s1, s4
+; RV64IM-NEXT: seqz s2, s8
+; RV64IM-NEXT: addi t0, t0, -1
+; RV64IM-NEXT: and a6, t0, a6
+; RV64IM-NEXT: sd a6, 800(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 11
+; RV64IM-NEXT: sd a6, 992(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: addi t2, t2, -1
+; RV64IM-NEXT: and a6, t2, a7
+; RV64IM-NEXT: sd a6, 824(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a6, a2, s9
+; RV64IM-NEXT: addi t4, t4, -1
+; RV64IM-NEXT: and a7, t4, t1
+; RV64IM-NEXT: sd a7, 792(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, a2, s7
+; RV64IM-NEXT: addi t6, t6, -1
+; RV64IM-NEXT: and t0, t6, t3
+; RV64IM-NEXT: sd t0, 784(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t0, a2, s5
+; RV64IM-NEXT: addi s1, s1, -1
+; RV64IM-NEXT: and t1, s1, t5
+; RV64IM-NEXT: sd t1, 808(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t1, a2, s6
+; RV64IM-NEXT: addi s2, s2, -1
+; RV64IM-NEXT: and t2, s2, s0
+; RV64IM-NEXT: sd t2, 832(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t2, a5, 31
+; RV64IM-NEXT: sd t2, 984(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a6, a0, a6
; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: xor a6, t1, a7
-; RV64IM-NEXT: sd a6, 688(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t3, a4, 32
-; RV64IM-NEXT: sd t3, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: mul a7, a0, t0
-; RV64IM-NEXT: xor a5, a5, a7
-; RV64IM-NEXT: sd a5, 680(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s2, a4, 33
-; RV64IM-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s4, a4, 34
-; RV64IM-NEXT: sd s4, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s7, a4, 35
-; RV64IM-NEXT: sd s7, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s11, a4, 36
-; RV64IM-NEXT: sd s11, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a5, a4, 37
-; RV64IM-NEXT: sd a5, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a6, a4, 38
-; RV64IM-NEXT: sd a6, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a7, a4, 39
-; RV64IM-NEXT: sd a7, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t0, a4, 40
-; RV64IM-NEXT: sd t0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t4, a4, 41
+; RV64IM-NEXT: xor a6, a6, a7
+; RV64IM-NEXT: sd a6, 776(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 32
+; RV64IM-NEXT: sd a6, 976(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a6, a0, t0
+; RV64IM-NEXT: mul a7, a0, t1
+; RV64IM-NEXT: xor a6, a6, a7
+; RV64IM-NEXT: sd a6, 768(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t0, a5, 33
+; RV64IM-NEXT: sd t0, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t2, a5, 34
+; RV64IM-NEXT: sd t2, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t3, a5, 35
+; RV64IM-NEXT: sd t3, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t4, a5, 36
; RV64IM-NEXT: sd t4, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t5, a4, 42
+; RV64IM-NEXT: slli t5, a5, 37
; RV64IM-NEXT: sd t5, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t6, a4, 43
+; RV64IM-NEXT: slli t6, a5, 38
; RV64IM-NEXT: sd t6, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s0, a4, 44
+; RV64IM-NEXT: slli s0, a5, 39
; RV64IM-NEXT: sd s0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s1, a4, 45
+; RV64IM-NEXT: slli s1, a5, 40
; RV64IM-NEXT: sd s1, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s3, a4, 46
-; RV64IM-NEXT: sd s3, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s5, a4, 47
-; RV64IM-NEXT: sd s5, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s6, a4, 48
-; RV64IM-NEXT: sd s6, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s8, a4, 49
-; RV64IM-NEXT: sd s8, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s9, a4, 50
-; RV64IM-NEXT: sd s9, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s10, a4, 51
-; RV64IM-NEXT: sd s10, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli ra, a4, 52
-; RV64IM-NEXT: sd ra, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 53
-; RV64IM-NEXT: sd a2, 768(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 54
-; RV64IM-NEXT: sd a2, 776(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 55
-; RV64IM-NEXT: sd a2, 784(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 56
-; RV64IM-NEXT: sd a2, 792(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 57
-; RV64IM-NEXT: sd a2, 800(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 58
-; RV64IM-NEXT: sd a2, 808(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 59
-; RV64IM-NEXT: sd a2, 816(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 60
-; RV64IM-NEXT: sd a2, 824(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 61
-; RV64IM-NEXT: sd a2, 840(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a4, a4, 62
-; RV64IM-NEXT: sd a4, 832(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: li a4, -1
-; RV64IM-NEXT: slli a4, a4, 63
-; RV64IM-NEXT: sd a4, 848(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 1
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 592(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 2
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 640(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 16
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 576(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 32
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 64
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: slli s2, a5, 41
+; RV64IM-NEXT: sd s2, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s3, a5, 42
+; RV64IM-NEXT: sd s3, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s4, a5, 43
+; RV64IM-NEXT: sd s4, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s5, a5, 44
+; RV64IM-NEXT: sd s5, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s6, a5, 45
+; RV64IM-NEXT: sd s6, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s7, a5, 46
+; RV64IM-NEXT: sd s7, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s8, a5, 47
+; RV64IM-NEXT: sd s8, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s9, a5, 48
+; RV64IM-NEXT: sd s9, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s10, a5, 49
+; RV64IM-NEXT: sd s10, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s11, a5, 50
+; RV64IM-NEXT: sd s11, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli ra, a5, 51
+; RV64IM-NEXT: sd ra, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 52
+; RV64IM-NEXT: sd a6, 968(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 53
+; RV64IM-NEXT: sd a6, 960(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 54
+; RV64IM-NEXT: sd a6, 952(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 55
+; RV64IM-NEXT: sd a6, 944(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 56
+; RV64IM-NEXT: sd a6, 936(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 57
+; RV64IM-NEXT: sd a6, 928(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 58
+; RV64IM-NEXT: sd a6, 920(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 59
+; RV64IM-NEXT: sd a6, 912(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 60
+; RV64IM-NEXT: sd a6, 904(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 61
+; RV64IM-NEXT: sd a6, 896(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a5, a5, 62
+; RV64IM-NEXT: sd a5, 888(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: li a5, -1
+; RV64IM-NEXT: slli a5, a5, 63
+; RV64IM-NEXT: sd a5, 880(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: sd a4, 688(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 1
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 624(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 2
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 4
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 8
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 752(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 64
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 584(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 128
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
; RV64IM-NEXT: sd a4, 664(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 512
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 1024
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 616(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 2048
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 648(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 4096
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 672(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 32768
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 472(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 65536
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: lui a5, 256
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 712(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 512
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 1024
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 760(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 8192
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
; RV64IM-NEXT: sd a4, 544(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 131072
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 624(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 262144
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: lui a5, 16384
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 600(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 32768
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 672(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 65536
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 704(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 131072
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 728(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 262144
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 744(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a5, 992(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: ld a4, 984(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, a2, a4
+; RV64IM-NEXT: ld a4, 976(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a7, a2, a4
+; RV64IM-NEXT: and t0, a2, t0
+; RV64IM-NEXT: and t1, a2, t2
+; RV64IM-NEXT: and t2, a2, t3
+; RV64IM-NEXT: and t3, a2, t4
+; RV64IM-NEXT: and t4, a2, t5
+; RV64IM-NEXT: and t5, a2, t6
+; RV64IM-NEXT: and t6, a2, s0
+; RV64IM-NEXT: and s0, a2, s1
+; RV64IM-NEXT: and s1, a2, s2
+; RV64IM-NEXT: and s2, a2, s3
+; RV64IM-NEXT: and s3, a2, s4
+; RV64IM-NEXT: and s4, a2, s5
+; RV64IM-NEXT: and s5, a2, s6
+; RV64IM-NEXT: and s6, a2, s7
+; RV64IM-NEXT: and s7, a2, s8
+; RV64IM-NEXT: and s8, a2, s9
+; RV64IM-NEXT: and s9, a2, s10
+; RV64IM-NEXT: and s10, a2, s11
+; RV64IM-NEXT: and s11, a2, ra
+; RV64IM-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and ra, a2, a4
+; RV64IM-NEXT: ld a4, 960(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
; RV64IM-NEXT: sd a4, 656(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a4, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: ld a2, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t1, a3, a2
-; RV64IM-NEXT: and a2, a3, t3
-; RV64IM-NEXT: sd a2, 608(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t3, a3, s2
-; RV64IM-NEXT: and s2, a3, s4
-; RV64IM-NEXT: and s4, a3, s7
-; RV64IM-NEXT: and s7, a3, s11
-; RV64IM-NEXT: and s11, a3, a5
-; RV64IM-NEXT: and a5, a3, a6
-; RV64IM-NEXT: and a6, a3, a7
-; RV64IM-NEXT: and a7, a3, t0
-; RV64IM-NEXT: and t0, a3, t4
-; RV64IM-NEXT: and t4, a3, t5
-; RV64IM-NEXT: and t5, a3, t6
-; RV64IM-NEXT: and t6, a3, s0
-; RV64IM-NEXT: and s0, a3, s1
-; RV64IM-NEXT: and s1, a3, s3
-; RV64IM-NEXT: and s3, a3, s5
-; RV64IM-NEXT: and s5, a3, s6
-; RV64IM-NEXT: and s6, a3, s8
-; RV64IM-NEXT: and s8, a3, s9
-; RV64IM-NEXT: and s9, a3, s10
-; RV64IM-NEXT: and s10, a3, ra
-; RV64IM-NEXT: ld a2, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and ra, a3, a2
-; RV64IM-NEXT: ld a2, 776(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 600(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 784(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 512(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 496(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 456(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 816(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 848(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: mul a3, a0, a4
-; RV64IM-NEXT: sd a3, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, t1
-; RV64IM-NEXT: sd a4, 568(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 608(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, t3
-; RV64IM-NEXT: sd a3, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, s2
-; RV64IM-NEXT: sd a3, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s4
-; RV64IM-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s7
-; RV64IM-NEXT: sd a4, 488(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s11
-; RV64IM-NEXT: sd a4, 536(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 928(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 920(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 904(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 896(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 880(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a2, a2, a4
+; RV64IM-NEXT: ld a4, 688(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a4, a0
+; RV64IM-NEXT: sd a4, 424(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a4, a0, a5
-; RV64IM-NEXT: sd a4, 584(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, a6
-; RV64IM-NEXT: sd a3, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, a7
-; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, t0
-; RV64IM-NEXT: sd a3, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, t4
; RV64IM-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, t5
-; RV64IM-NEXT: sd a4, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, a6
+; RV64IM-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, a7
+; RV64IM-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, t0
+; RV64IM-NEXT: sd a4, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t1
+; RV64IM-NEXT: sd a5, 512(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t2
+; RV64IM-NEXT: sd a5, 560(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t3
+; RV64IM-NEXT: sd a5, 592(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t4
+; RV64IM-NEXT: sd a5, 640(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t5
+; RV64IM-NEXT: sd a5, 688(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a4, a0, t6
-; RV64IM-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 408(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a4, a0, s0
-; RV64IM-NEXT: sd a4, 552(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 400(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a4, a0, s1
-; RV64IM-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, s3
-; RV64IM-NEXT: sd a3, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, s5
-; RV64IM-NEXT: sd a3, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, s6
-; RV64IM-NEXT: sd a3, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, s2
+; RV64IM-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s3
+; RV64IM-NEXT: sd a5, 536(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s4
+; RV64IM-NEXT: sd a5, 568(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s5
+; RV64IM-NEXT: sd a5, 616(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s6
+; RV64IM-NEXT: sd a5, 648(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s7
+; RV64IM-NEXT: sd a5, 680(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a4, a0, s8
-; RV64IM-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a4, a0, s9
-; RV64IM-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a4, a0, s10
-; RV64IM-NEXT: sd a4, 480(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, ra
-; RV64IM-NEXT: sd a4, 520(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 600(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
-; RV64IM-NEXT: sd a4, 560(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 512(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
-; RV64IM-NEXT: sd a4, 600(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 496(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 456(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
-; RV64IM-NEXT: sd a4, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
; RV64IM-NEXT: sd a4, 456(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
+; RV64IM-NEXT: mul a4, a0, s11
+; RV64IM-NEXT: sd a4, 488(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, ra
+; RV64IM-NEXT: sd a5, 520(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 656(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: sd a4, 552(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 632(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: sd a4, 576(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 608(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 528(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 496(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: sd a4, 656(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: sd a4, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: sd a4, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: sd a4, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a4, a0, a4
; RV64IM-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a0, a0, a2
-; RV64IM-NEXT: sd a0, 512(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a0, a2
+; RV64IM-NEXT: sd a0, 528(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 2
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 1
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 4
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 2
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 8
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 3
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 16
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 4
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 32
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 5
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 64
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 6
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 128
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 7
+; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a0, t2, 64
-; RV64IM-NEXT: mul a0, a1, a0
-; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a2, a0
-; RV64IM-NEXT: sd a0, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a0, t2, 512
-; RV64IM-NEXT: mul a0, a1, a0
-; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a2, a0
-; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a0, 4
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: lui a2, 8
-; RV64IM-NEXT: and a2, t2, a2
-; RV64IM-NEXT: mul a0, a1, a0
-; RV64IM-NEXT: mul a2, a1, a2
-; RV64IM-NEXT: xor a0, a0, a2
+; RV64IM-NEXT: andi a0, a3, 256
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 8
+; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a0, 128
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: lui a2, 256
-; RV64IM-NEXT: and a2, t2, a2
+; RV64IM-NEXT: andi a0, a3, 512
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 9
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 1024
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 10
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a0, 16
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: lui a2, 32
+; RV64IM-NEXT: and a2, a3, a2
; RV64IM-NEXT: mul a0, a1, a0
; RV64IM-NEXT: mul a2, a1, a2
; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a0, 8192
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: lui a2, 16384
-; RV64IM-NEXT: and a2, t2, a2
+; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a0, 2048
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: lui a2, 4096
+; RV64IM-NEXT: and a2, a3, a2
; RV64IM-NEXT: mul a0, a1, a0
; RV64IM-NEXT: mul a2, a1, a2
; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: sd a0, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a0, 1
-; RV64IM-NEXT: and a5, t2, a0
+; RV64IM-NEXT: and t4, a3, a0
; RV64IM-NEXT: lui a0, 2
-; RV64IM-NEXT: and a7, t2, a0
-; RV64IM-NEXT: lui a0, 16
-; RV64IM-NEXT: and t4, t2, a0
-; RV64IM-NEXT: lui a0, 32
-; RV64IM-NEXT: and a6, t2, a0
+; RV64IM-NEXT: and a7, a3, a0
+; RV64IM-NEXT: lui a0, 4
+; RV64IM-NEXT: and t5, a3, a0
+; RV64IM-NEXT: lui a0, 8
+; RV64IM-NEXT: and s0, a3, a0
; RV64IM-NEXT: lui a0, 64
-; RV64IM-NEXT: and t5, t2, a0
+; RV64IM-NEXT: and s2, a3, a0
+; RV64IM-NEXT: lui a0, 128
+; RV64IM-NEXT: and s4, a3, a0
+; RV64IM-NEXT: lui a0, 256
+; RV64IM-NEXT: and s3, a3, a0
; RV64IM-NEXT: lui a0, 512
-; RV64IM-NEXT: and t6, t2, a0
+; RV64IM-NEXT: and s6, a3, a0
; RV64IM-NEXT: lui a0, 1024
-; RV64IM-NEXT: and s0, t2, a0
-; RV64IM-NEXT: lui a0, 2048
-; RV64IM-NEXT: and s1, t2, a0
-; RV64IM-NEXT: lui a0, 4096
-; RV64IM-NEXT: and s2, t2, a0
-; RV64IM-NEXT: lui a0, 32768
-; RV64IM-NEXT: and s5, t2, a0
+; RV64IM-NEXT: and s7, a3, a0
+; RV64IM-NEXT: lui a0, 8192
+; RV64IM-NEXT: and s8, a3, a0
+; RV64IM-NEXT: lui a0, 16384
+; RV64IM-NEXT: and s9, a3, a0
+; RV64IM-NEXT: lui a5, 32768
+; RV64IM-NEXT: and a5, a3, a5
; RV64IM-NEXT: lui a0, 65536
-; RV64IM-NEXT: and s10, t2, a0
+; RV64IM-NEXT: and s10, a3, a0
; RV64IM-NEXT: lui a0, 131072
-; RV64IM-NEXT: and t3, t2, a0
+; RV64IM-NEXT: and s11, a3, a0
; RV64IM-NEXT: lui a0, 262144
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 760(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 752(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: ld a0, 992(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 984(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 984(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 976(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 976(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t0, a3, a0
; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 248(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and ra, t2, a0
-; RV64IM-NEXT: ld a0, 776(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s11, t2, a0
-; RV64IM-NEXT: ld a0, 784(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s4, t2, a0
-; RV64IM-NEXT: ld a0, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s7, t2, a0
-; RV64IM-NEXT: ld a0, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s6, t2, a0
-; RV64IM-NEXT: ld a0, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s8, t2, a0
-; RV64IM-NEXT: ld a0, 816(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a4, t2, a0
-; RV64IM-NEXT: ld a0, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: ld a2, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, t2, a2
-; RV64IM-NEXT: ld a3, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t1, t2, a3
-; RV64IM-NEXT: ld t0, 848(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t0, t2, t0
-; RV64IM-NEXT: andi a3, t2, 1024
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, a5
-; RV64IM-NEXT: sd a3, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, a7
-; RV64IM-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s9, a1, t4
-; RV64IM-NEXT: mul a3, a1, a6
-; RV64IM-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, t5
-; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s3, a1, t6
-; RV64IM-NEXT: mul a3, a1, s0
-; RV64IM-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, s1
-; RV64IM-NEXT: sd a3, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, s2
-; RV64IM-NEXT: sd a3, 792(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s2, a1, s5
-; RV64IM-NEXT: mul a3, a1, s10
-; RV64IM-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, t3
-; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 776(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t5, a1, a3
-; RV64IM-NEXT: ld a3, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 824(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s0, a1, a3
-; RV64IM-NEXT: ld a3, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t6, a1, a3
-; RV64IM-NEXT: ld a3, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 968(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 960(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and ra, a3, a0
+; RV64IM-NEXT: ld a0, 952(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, a3, a0
+; RV64IM-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: ld a0, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s5, a3, a0
+; RV64IM-NEXT: ld a0, 928(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t2, a3, a0
+; RV64IM-NEXT: ld s1, 920(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s1, a3, s1
+; RV64IM-NEXT: ld a0, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t6, a3, a0
+; RV64IM-NEXT: ld a2, 904(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a2, a3, a2
+; RV64IM-NEXT: ld a0, 896(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: ld t1, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t3, a3, t1
+; RV64IM-NEXT: ld t1, 880(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t1, a3, t1
+; RV64IM-NEXT: andi a3, a3, 1
+; RV64IM-NEXT: seqz a3, a3
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: mul t4, a1, t4
+; RV64IM-NEXT: sd t4, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, a7
+; RV64IM-NEXT: sd a7, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, t5
+; RV64IM-NEXT: sd a7, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s0
+; RV64IM-NEXT: sd a7, 912(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s2
+; RV64IM-NEXT: sd a7, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s4
+; RV64IM-NEXT: sd a7, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s3
+; RV64IM-NEXT: sd a7, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s6
+; RV64IM-NEXT: sd a7, 888(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s7
+; RV64IM-NEXT: sd a7, 968(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s8
+; RV64IM-NEXT: sd a7, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s9
+; RV64IM-NEXT: sd a7, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a1, a5
+; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a1, s10
+; RV64IM-NEXT: sd a5, 880(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a1, s11
+; RV64IM-NEXT: sd a5, 936(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a5, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a5, a1, a5
+; RV64IM-NEXT: sd a5, 992(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t5, a3, a1
; RV64IM-NEXT: ld a3, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 752(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 800(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul s6, a1, a3
+; RV64IM-NEXT: ld a3, 984(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s8, a1, a3
+; RV64IM-NEXT: ld a3, 976(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s7, a1, a3
; RV64IM-NEXT: ld a3, 136(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 136(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t4, a1, a3
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 144(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t3, a1, a3
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 256(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s5, a1, a3
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 896(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 168(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 960(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 984(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 760(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul s3, a1, a3
; RV64IM-NEXT: ld a3, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s2, a1, a3
+; RV64IM-NEXT: mul a3, a1, t0
+; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 216(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 816(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 224(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 200(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 920(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 952(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 976(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t2, a1, a3
-; RV64IM-NEXT: ld a3, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a1, a3
+; RV64IM-NEXT: mul s0, a1, a3
; RV64IM-NEXT: ld a3, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s1, a1, a3
-; RV64IM-NEXT: ld a3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul t4, a1, a3
+; RV64IM-NEXT: ld a3, 48(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 288(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a3, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 32(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a3, a1, ra
-; RV64IM-NEXT: sd a3, 784(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, s11
-; RV64IM-NEXT: sd a3, 808(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, s4
-; RV64IM-NEXT: sd a3, 832(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s7, a1, s7
-; RV64IM-NEXT: mul s6, a1, s6
-; RV64IM-NEXT: mul s11, a1, s8
+; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a3, a1, a6
+; RV64IM-NEXT: sd a3, 288(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a3, a1, a4
-; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 904(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a3, a1, s5
+; RV64IM-NEXT: sd a3, 928(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a3, a1, t2
+; RV64IM-NEXT: sd a3, 944(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul s1, a1, s1
+; RV64IM-NEXT: mul t6, a1, t6
+; RV64IM-NEXT: mul s4, a1, a2
; RV64IM-NEXT: mul a0, a1, a0
-; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a0, a1, a2
+; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a0, a1, t3
; RV64IM-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a0, a1, t1
-; RV64IM-NEXT: sd a0, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a0, a1, t0
-; RV64IM-NEXT: sd a0, 768(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 720(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 712(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a0, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 856(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t2, a1, a0
+; RV64IM-NEXT: ld a0, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, a0, a1
+; RV64IM-NEXT: ld a0, 816(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 800(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s5, a0, a1
+; RV64IM-NEXT: ld a0, 792(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 784(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor ra, a0, a1
+; RV64IM-NEXT: ld a0, 624(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a1, a0
+; RV64IM-NEXT: ld a0, 776(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 584(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t0, a0, a1
-; RV64IM-NEXT: ld a0, 728(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 704(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, a1, a0
-; RV64IM-NEXT: ld a0, 592(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor ra, a1, a0
-; RV64IM-NEXT: ld a0, 696(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 576(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a0, 768(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 544(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, a0, a1
+; RV64IM-NEXT: ld a0, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a3, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a0, a3
+; RV64IM-NEXT: ld a0, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a0, a4
+; RV64IM-NEXT: ld a0, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a5, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a0, a5
+; RV64IM-NEXT: ld a0, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a0, a1
+; RV64IM-NEXT: ld a0, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, a0
+; RV64IM-NEXT: ld a0, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 688(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a2, 528(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a1, a2
-; RV64IM-NEXT: ld a2, 680(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a3, 472(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, a3
-; RV64IM-NEXT: ld a3, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a4, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: ld a4, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a5, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s8, a4, a5
-; RV64IM-NEXT: ld a4, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a5, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, a5
-; RV64IM-NEXT: ld a5, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a6, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, a6
-; RV64IM-NEXT: ld a6, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, s10
-; RV64IM-NEXT: ld s10, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s10, s10, s4
-; RV64IM-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t5, s4
-; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s9, s4, s9
-; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s3, s4, s3
-; RV64IM-NEXT: ld s4, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s4, s2
-; RV64IM-NEXT: xor t6, s0, t6
-; RV64IM-NEXT: xor t3, t4, t3
-; RV64IM-NEXT: xor a7, t2, a7
-; RV64IM-NEXT: xor t2, s7, s6
-; RV64IM-NEXT: xor t0, t0, t1
-; RV64IM-NEXT: ld t1, 640(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, ra, t1
-; RV64IM-NEXT: ld t4, 632(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a0, t4
-; RV64IM-NEXT: ld t4, 616(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, a1, t4
-; RV64IM-NEXT: ld t4, 544(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t4
-; RV64IM-NEXT: ld t4, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t4
-; RV64IM-NEXT: ld t4, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, s8, t4
-; RV64IM-NEXT: ld s0, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, s0
-; RV64IM-NEXT: ld s0, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, s0
-; RV64IM-NEXT: xor a6, a6, s10
-; RV64IM-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t5, s0
-; RV64IM-NEXT: ld s0, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s9, s0
-; RV64IM-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s3, s3, s4
-; RV64IM-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s2, s4
-; RV64IM-NEXT: ld s4, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, t6, s4
-; RV64IM-NEXT: xor t3, t3, s5
-; RV64IM-NEXT: xor a7, a7, s1
-; RV64IM-NEXT: xor t2, t2, s11
-; RV64IM-NEXT: xor t0, t0, t1
-; RV64IM-NEXT: ld t1, 664(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a0, t1
-; RV64IM-NEXT: ld t1, 648(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, a1, t1
-; RV64IM-NEXT: ld t1, 624(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t1
-; RV64IM-NEXT: ld t1, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t1
-; RV64IM-NEXT: ld t1, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t4, t1
-; RV64IM-NEXT: ld t4, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, t4
-; RV64IM-NEXT: ld t4, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, t4
-; RV64IM-NEXT: xor a6, a6, t5
-; RV64IM-NEXT: ld t4, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, s11
+; RV64IM-NEXT: ld s10, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s6, s6, s10
+; RV64IM-NEXT: ld s11, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s11, s11, s10
+; RV64IM-NEXT: ld s10, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s9, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s10, s10, s9
+; RV64IM-NEXT: xor s7, s8, s7
+; RV64IM-NEXT: xor s2, s3, s2
; RV64IM-NEXT: xor t4, s0, t4
-; RV64IM-NEXT: ld t5, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, s3, t5
-; RV64IM-NEXT: ld s0, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, s1, t6
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 824(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s5, t3
+; RV64IM-NEXT: ld s0, 808(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, ra, s0
+; RV64IM-NEXT: ld s1, 696(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, s1
+; RV64IM-NEXT: ld s1, 664(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, s1
+; RV64IM-NEXT: ld s1, 600(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, s1
+; RV64IM-NEXT: ld s1, 480(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, s1
+; RV64IM-NEXT: ld s1, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, s1
+; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, s1
+; RV64IM-NEXT: ld s1, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, s1
+; RV64IM-NEXT: xor a0, t5, a0
+; RV64IM-NEXT: ld t5, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, t5
+; RV64IM-NEXT: ld t5, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, t5
+; RV64IM-NEXT: ld t5, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, s6, t5
+; RV64IM-NEXT: ld s1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, s11, s1
+; RV64IM-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s3, s10, s3
+; RV64IM-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s5, s7, s5
+; RV64IM-NEXT: ld s6, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s2, s6
+; RV64IM-NEXT: ld s6, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, s6
+; RV64IM-NEXT: xor t6, t6, s4
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 832(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s0, t3
+; RV64IM-NEXT: ld s0, 720(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, s0
+; RV64IM-NEXT: ld s0, 712(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, s0
+; RV64IM-NEXT: ld s0, 672(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, s0
+; RV64IM-NEXT: ld s0, 512(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, s0
+; RV64IM-NEXT: ld s0, 504(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, s0
+; RV64IM-NEXT: ld s0, 488(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, s0
+; RV64IM-NEXT: ld s0, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, s0
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t5, a2
+; RV64IM-NEXT: ld t5, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, s1, t5
+; RV64IM-NEXT: ld s0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s3, s0
+; RV64IM-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, s5, s1
+; RV64IM-NEXT: ld s3, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s2, s3
+; RV64IM-NEXT: ld s3, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, s3
+; RV64IM-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, t6, s3
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 752(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, t3
+; RV64IM-NEXT: ld t3, 736(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, t3
+; RV64IM-NEXT: ld t3, 704(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, t3
+; RV64IM-NEXT: ld t3, 560(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t3
+; RV64IM-NEXT: ld t3, 536(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, t3
+; RV64IM-NEXT: ld t3, 520(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, t3
+; RV64IM-NEXT: ld t3, 496(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, t3
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t5, a2
+; RV64IM-NEXT: ld t3, 880(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s0, t3
+; RV64IM-NEXT: ld t5, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, s1, t5
+; RV64IM-NEXT: ld s0, 224(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor s0, s2, s0
-; RV64IM-NEXT: ld s1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, s1
+; RV64IM-NEXT: ld s1, 200(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t6, t6, s1
-; RV64IM-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t3, t3, s1
-; RV64IM-NEXT: ld s1, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, s1
-; RV64IM-NEXT: ld s1, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, s1
-; RV64IM-NEXT: xor a0, t0, a0
-; RV64IM-NEXT: ld t0, 672(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, a1, t0
-; RV64IM-NEXT: ld t0, 656(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t0
-; RV64IM-NEXT: ld t0, 488(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t0
-; RV64IM-NEXT: ld t0, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, t2, a7
+; RV64IM-NEXT: ld t2, 760(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, t2
+; RV64IM-NEXT: ld t2, 728(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, t2
+; RV64IM-NEXT: ld t2, 592(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t2
+; RV64IM-NEXT: ld t2, 568(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, t2
+; RV64IM-NEXT: ld t2, 552(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, t2
+; RV64IM-NEXT: ld t2, 528(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, t2
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 968(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t3, a2
+; RV64IM-NEXT: ld t2, 896(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t2, t5, t2
+; RV64IM-NEXT: ld t3, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s0, t3
+; RV64IM-NEXT: ld t5, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, t5
+; RV64IM-NEXT: ld t5, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t6, t5
+; RV64IM-NEXT: xor a7, a7, t0
+; RV64IM-NEXT: ld t0, 744(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t0, t1, t0
-; RV64IM-NEXT: ld t1, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t1, 640(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t1
+; RV64IM-NEXT: ld t1, 616(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a4, a4, t1
-; RV64IM-NEXT: ld t1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t1, 576(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a5, a5, t1
-; RV64IM-NEXT: xor a6, a6, t4
-; RV64IM-NEXT: ld t1, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t5, t1
-; RV64IM-NEXT: ld t4, 776(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, s0, t4
-; RV64IM-NEXT: ld t5, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t6, t5
-; RV64IM-NEXT: ld t6, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t3, t3, t6
-; RV64IM-NEXT: ld t6, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t6
-; RV64IM-NEXT: ld t6, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, t6
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 568(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 992(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: ld a2, 536(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 504(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, t0, a3
-; RV64IM-NEXT: ld t0, 480(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 960(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t2, a2
+; RV64IM-NEXT: ld t1, 920(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t3, t1
+; RV64IM-NEXT: ld t2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t2, t4, t2
+; RV64IM-NEXT: xor a7, a7, t0
+; RV64IM-NEXT: ld t0, 688(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t0
+; RV64IM-NEXT: ld t0, 648(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a4, a4, t0
-; RV64IM-NEXT: ld t0, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t0, 608(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a5, a5, t0
-; RV64IM-NEXT: xor a6, a6, t1
-; RV64IM-NEXT: ld t0, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t4, t0
-; RV64IM-NEXT: ld t1, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t5, t1
-; RV64IM-NEXT: ld t4, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t3, t3, t4
-; RV64IM-NEXT: ld t4, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t4
-; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, t4
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 584(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 984(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: ld a2, 552(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 520(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a4, a3
-; RV64IM-NEXT: ld a4, 496(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a5, a4
-; RV64IM-NEXT: xor a5, a6, t0
-; RV64IM-NEXT: ld a6, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, t1, a6
-; RV64IM-NEXT: ld t0, 816(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t3, t0
-; RV64IM-NEXT: ld t1, 784(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t1
-; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t2, t1
+; RV64IM-NEXT: ld a2, 952(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t1, a2
+; RV64IM-NEXT: ld t0, 904(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t2, t0
+; RV64IM-NEXT: xor a3, a7, a3
+; RV64IM-NEXT: ld a7, 680(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, a7
+; RV64IM-NEXT: ld a7, 632(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, a7
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 608(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 976(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: ld a2, 560(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 512(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a4, a3
-; RV64IM-NEXT: xor a4, a5, a6
-; RV64IM-NEXT: ld a5, 848(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, t0, a5
-; RV64IM-NEXT: ld a6, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a7, a6
-; RV64IM-NEXT: ld a7, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, t1, a7
+; RV64IM-NEXT: ld a2, 928(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t0, a2
+; RV64IM-NEXT: xor a3, a3, a4
+; RV64IM-NEXT: ld a4, 656(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a5, a4
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 600(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 944(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: xor a4, a4, a5
-; RV64IM-NEXT: ld a2, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a6, a2
+; RV64IM-NEXT: xor a3, a3, a4
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: xor a2, a4, a2
-; RV64IM-NEXT: xor a0, a0, a3
-; RV64IM-NEXT: xor a1, a2, a7
-; RV64IM-NEXT: ld a2, 736(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: sd a0, 0(a2)
-; RV64IM-NEXT: sd a1, 8(a2)
-; RV64IM-NEXT: ld a2, 744(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: sd a0, 0(a2)
-; RV64IM-NEXT: sd a1, 8(a2)
-; RV64IM-NEXT: ld ra, 952(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s0, 944(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 936(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s2, 928(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s3, 920(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 912(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s5, 904(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s6, 896(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s7, 888(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s8, 880(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s9, 872(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 864(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s11, 856(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: addi sp, sp, 960
+; RV64IM-NEXT: xor a1, a3, a6
+; RV64IM-NEXT: xor a0, a0, t5
+; RV64IM-NEXT: ld a2, 864(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 0(a2)
+; RV64IM-NEXT: sd a0, 8(a2)
+; RV64IM-NEXT: ld a2, 872(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 0(a2)
+; RV64IM-NEXT: sd a0, 8(a2)
+; RV64IM-NEXT: ld ra, 1096(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 1088(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 1080(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s2, 1072(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s3, 1064(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s4, 1056(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s5, 1048(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s6, 1040(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s7, 1032(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s8, 1024(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s9, 1016(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s10, 1008(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 1000(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 1104
; RV64IM-NEXT: ret
;
; RV32IMZBS-LABEL: commutative_clmul_v2i64:
@@ -12333,1858 +13739,1944 @@ define void @commutative_clmul_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %p
; RV32IMZBS-NEXT: sw s9, 740(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: sw s10, 736(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: sw s11, 732(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a3, 688(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a2, 684(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw ra, 0(a1)
-; RV32IMZBS-NEXT: lw a5, 4(a1)
-; RV32IMZBS-NEXT: lw a2, 8(a1)
-; RV32IMZBS-NEXT: sw a2, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 12(a1)
-; RV32IMZBS-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 0(a0)
-; RV32IMZBS-NEXT: lw a2, 4(a0)
-; RV32IMZBS-NEXT: lw s10, 8(a0)
-; RV32IMZBS-NEXT: lw a0, 12(a0)
-; RV32IMZBS-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a3, 728(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a2, 724(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a4, 0(a1)
+; RV32IMZBS-NEXT: lw a6, 4(a1)
+; RV32IMZBS-NEXT: lw s4, 8(a1)
+; RV32IMZBS-NEXT: lw s10, 12(a1)
+; RV32IMZBS-NEXT: lw t0, 0(a0)
+; RV32IMZBS-NEXT: lw t1, 4(a0)
+; RV32IMZBS-NEXT: lw t2, 8(a0)
+; RV32IMZBS-NEXT: lw s2, 12(a0)
; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: bseti a7, zero, 11
-; RV32IMZBS-NEXT: lui s6, 1
-; RV32IMZBS-NEXT: addi t0, a0, -256
-; RV32IMZBS-NEXT: sw t0, 728(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli a0, a1, 8
-; RV32IMZBS-NEXT: srli a6, a1, 24
-; RV32IMZBS-NEXT: and a0, a0, t0
-; RV32IMZBS-NEXT: or a0, a0, a6
-; RV32IMZBS-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 2
-; RV32IMZBS-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 1
+; RV32IMZBS-NEXT: addi s8, a0, -256
+; RV32IMZBS-NEXT: srli a7, t0, 8
+; RV32IMZBS-NEXT: srli t3, t0, 24
+; RV32IMZBS-NEXT: and a3, t0, s8
+; RV32IMZBS-NEXT: slli s9, t0, 24
+; RV32IMZBS-NEXT: srli t6, a4, 8
+; RV32IMZBS-NEXT: srli s1, a4, 24
+; RV32IMZBS-NEXT: and t4, a4, s8
+; RV32IMZBS-NEXT: slli t5, a4, 24
+; RV32IMZBS-NEXT: slli a5, t1, 1
+; RV32IMZBS-NEXT: andi s5, a4, 2
+; RV32IMZBS-NEXT: slli s0, t1, 2
+; RV32IMZBS-NEXT: andi s3, a4, 4
+; RV32IMZBS-NEXT: slli a1, t1, 3
+; RV32IMZBS-NEXT: andi a0, a4, 8
+; RV32IMZBS-NEXT: andi a2, a4, 16
+; RV32IMZBS-NEXT: slli s6, t0, 1
+; RV32IMZBS-NEXT: srli s7, t2, 8
+; RV32IMZBS-NEXT: and a7, a7, s8
+; RV32IMZBS-NEXT: or a7, a7, t3
+; RV32IMZBS-NEXT: srli t3, t2, 24
+; RV32IMZBS-NEXT: and t6, t6, s8
+; RV32IMZBS-NEXT: or t6, t6, s1
+; RV32IMZBS-NEXT: srli s1, s4, 8
+; RV32IMZBS-NEXT: slli t4, t4, 8
+; RV32IMZBS-NEXT: or t4, t5, t4
+; RV32IMZBS-NEXT: srli t5, s4, 24
+; RV32IMZBS-NEXT: and s7, s7, s8
+; RV32IMZBS-NEXT: or t3, s7, t3
+; RV32IMZBS-NEXT: and s7, s4, s8
+; RV32IMZBS-NEXT: and s1, s1, s8
+; RV32IMZBS-NEXT: sw s8, 720(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or t5, s1, t5
+; RV32IMZBS-NEXT: slli s1, s4, 24
+; RV32IMZBS-NEXT: slli s7, s7, 8
+; RV32IMZBS-NEXT: or s1, s1, s7
+; RV32IMZBS-NEXT: andi s7, a6, 2
+; RV32IMZBS-NEXT: slli a3, a3, 8
+; RV32IMZBS-NEXT: or a3, s9, a3
+; RV32IMZBS-NEXT: or a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a3, t2, s8
+; RV32IMZBS-NEXT: or a7, t4, t6
+; RV32IMZBS-NEXT: sw a7, 708(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t6, t2, 24
+; RV32IMZBS-NEXT: slli a3, a3, 8
+; RV32IMZBS-NEXT: or a3, t6, a3
+; RV32IMZBS-NEXT: or a3, a3, t3
+; RV32IMZBS-NEXT: sw a3, 712(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, t0, 2
+; RV32IMZBS-NEXT: or a7, s1, t5
+; RV32IMZBS-NEXT: sw a7, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a7, a6, 4
+; RV32IMZBS-NEXT: seqz t3, s5
+; RV32IMZBS-NEXT: seqz t4, s7
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: and t5, t3, a5
+; RV32IMZBS-NEXT: and a5, t4, s6
+; RV32IMZBS-NEXT: and s5, t3, s6
+; RV32IMZBS-NEXT: slli t3, t0, 3
+; RV32IMZBS-NEXT: seqz t4, s3
+; RV32IMZBS-NEXT: seqz a7, a7
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: and s3, t4, s0
+; RV32IMZBS-NEXT: and s1, a7, a3
+; RV32IMZBS-NEXT: and t4, t4, a3
+; RV32IMZBS-NEXT: andi a3, a6, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and s8, a0, a1
+; RV32IMZBS-NEXT: and s6, a3, t3
+; RV32IMZBS-NEXT: and t3, a0, t3
+; RV32IMZBS-NEXT: andi a0, a6, 16
+; RV32IMZBS-NEXT: seqz a1, a2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a2, t1, 4
+; RV32IMZBS-NEXT: and a2, a1, a2
+; RV32IMZBS-NEXT: sw a2, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t0, 4
+; RV32IMZBS-NEXT: and s11, a0, a2
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, a4, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a6, 32
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t1, 5
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t0, 5
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 4
-; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 8
-; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 16
-; RV32IMZBS-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 32
-; RV32IMZBS-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 64
-; RV32IMZBS-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 128
-; RV32IMZBS-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 256
+; RV32IMZBS-NEXT: andi a0, a4, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a6, 64
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t1, 6
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t0, 6
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, a4, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a6, 128
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t1, 7
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t0, 7
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, a4, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a6, 256
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t1, 8
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 564(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t0, 8
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 572(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 512
-; RV32IMZBS-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 1024
-; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a7, 724(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a5, a7
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a5, s6
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s4, 2
-; RV32IMZBS-NEXT: and a0, a5, s4
-; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s3, 4
-; RV32IMZBS-NEXT: and a0, a5, s3
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s2, 8
-; RV32IMZBS-NEXT: and a0, a5, s2
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t6, 16
-; RV32IMZBS-NEXT: and a0, a5, t6
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s1, 32
-; RV32IMZBS-NEXT: and a0, a5, s1
-; RV32IMZBS-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t5, 64
-; RV32IMZBS-NEXT: and a0, a5, t5
-; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t4, 128
-; RV32IMZBS-NEXT: and a0, a5, t4
-; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t3, 256
-; RV32IMZBS-NEXT: and a0, a5, t3
+; RV32IMZBS-NEXT: andi a0, a4, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a6, 512
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t1, 9
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t0, 9
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, a4, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a6, 1024
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t1, 10
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t0, 10
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a0, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 11
+; RV32IMZBS-NEXT: addi a2, a1, -1
+; RV32IMZBS-NEXT: not a1, a6
+; RV32IMZBS-NEXT: bexti a3, a1, 11
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 11
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 504(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 11
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 12
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 12
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 488(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 12
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 500(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 532(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 13
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 13
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 13
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 560(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 14
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 14
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 14
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 656(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 15
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 664(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 15
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 16
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 16
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 452(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 16
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 456(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 472(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 17
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 17
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 440(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 17
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 448(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 468(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 18
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 18
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 476(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 18
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 480(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 508(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 19
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 19
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 19
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 544(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 576(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 20
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 20
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 20
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 592(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 21
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 21
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 21
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 22
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 22
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 416(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 22
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 420(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 428(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 23
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 23
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 23
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 408(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 23
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 412(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 424(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 24
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 24
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 24
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 432(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a3, a3, s9
+; RV32IMZBS-NEXT: sw a3, 436(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, s9
+; RV32IMZBS-NEXT: sw a2, 444(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 25
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 25
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 25
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 460(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 25
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 464(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 484(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 26
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 26
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 26
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 492(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 26
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 496(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 512(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 27
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 27
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 27
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 27
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 520(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 528(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 28
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 28
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 28
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 28
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 568(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 29
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 29
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a7, t1, 29
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: sw a7, 384(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, t0, 29
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: sw a3, 388(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a7, a2, a7
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: bexti a1, a1, 30
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t1, 30
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 376(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t0, 30
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 380(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t1, 512
-; RV32IMZBS-NEXT: and a0, a5, t1
-; RV32IMZBS-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a6, 1024
-; RV32IMZBS-NEXT: and a0, a5, a6
-; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 2048
-; RV32IMZBS-NEXT: and a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 4096
-; RV32IMZBS-NEXT: and a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 8192
-; RV32IMZBS-NEXT: and a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s0, 16384
-; RV32IMZBS-NEXT: and a0, a5, s0
+; RV32IMZBS-NEXT: andi a0, a4, 1
+; RV32IMZBS-NEXT: srli a1, a4, 31
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a2, a6, 1
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a3, t1, 31
+; RV32IMZBS-NEXT: and a4, a0, t1
+; RV32IMZBS-NEXT: sw a4, 356(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t0
+; RV32IMZBS-NEXT: sw a2, 368(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and t1, a0, t0
+; RV32IMZBS-NEXT: slli t0, t0, 31
+; RV32IMZBS-NEXT: srli a0, a6, 31
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and a3, a1, a3
+; RV32IMZBS-NEXT: sw a3, 392(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, t0
+; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a1, t0
+; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 2
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 1
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 344(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 1
+; RV32IMZBS-NEXT: and t0, a1, a2
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t2, 32768
-; RV32IMZBS-NEXT: and a0, a5, t2
-; RV32IMZBS-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 65536
-; RV32IMZBS-NEXT: and a0, a5, t0
-; RV32IMZBS-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s11, 131072
-; RV32IMZBS-NEXT: and a0, a5, s11
+; RV32IMZBS-NEXT: andi a0, s4, 4
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 4
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 2
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 324(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 2
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 332(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a4, 262144
-; RV32IMZBS-NEXT: and a0, a5, a4
+; RV32IMZBS-NEXT: andi a0, s4, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 8
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 3
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 300(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 3
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 16
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 4
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 276(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 4
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 288(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 32
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 5
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 256(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 5
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 292(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 64
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 6
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 320(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 6
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 328(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 128
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 7
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 220(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 7
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 256
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 8
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 196(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 8
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 512
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 9
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 252(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 9
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s10, 1024
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 10
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 340(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t2, 10
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 348(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a1, s4
+; RV32IMZBS-NEXT: bexti a0, a1, 11
+; RV32IMZBS-NEXT: addi a2, a0, -1
+; RV32IMZBS-NEXT: not a0, s10
+; RV32IMZBS-NEXT: bexti a3, a0, 11
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 11
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 152(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 11
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 156(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 12
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 12
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 124(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 12
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 136(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 160(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 13
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 13
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 184(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 13
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 192(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 208(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 14
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 14
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 264(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 14
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 272(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 280(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 15
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 296(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 15
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 304(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 312(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 16
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 16
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 80(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 16
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 84(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 96(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 17
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 17
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 72(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 17
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 76(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 88(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 18
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 18
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 104(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 18
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 112(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 132(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 19
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 19
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 180(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 19
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 188(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 200(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 20
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 20
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 212(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 20
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 216(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 224(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 21
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 21
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 232(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 21
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 236(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 244(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 22
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 22
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 44(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 22
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 48(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 56(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 23
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 23
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 23
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 36(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 23
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 40(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 52(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 24
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 24
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 24
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 60(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a3, a3, t6
+; RV32IMZBS-NEXT: sw a3, 64(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t6
+; RV32IMZBS-NEXT: sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 25
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 25
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 25
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 92(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 25
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 100(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 108(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 26
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 26
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 26
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 116(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 26
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 120(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 128(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 27
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 27
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 27
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 140(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 27
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 144(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 148(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 28
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 28
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a6, s2, 28
+; RV32IMZBS-NEXT: and a4, a2, a6
+; RV32IMZBS-NEXT: sw a4, 172(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, t2, 28
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: sw a3, 168(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 176(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 29
+; RV32IMZBS-NEXT: addi a6, a2, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 29
+; RV32IMZBS-NEXT: addi a4, a2, -1
+; RV32IMZBS-NEXT: slli a2, s2, 29
+; RV32IMZBS-NEXT: and ra, a6, a2
+; RV32IMZBS-NEXT: slli a3, t2, 29
+; RV32IMZBS-NEXT: and a4, a4, a3
+; RV32IMZBS-NEXT: and a2, a6, a3
+; RV32IMZBS-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a1, 30
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi a2, a0, -1
+; RV32IMZBS-NEXT: slli a3, s2, 30
+; RV32IMZBS-NEXT: and s7, a1, a3
+; RV32IMZBS-NEXT: slli a0, t2, 30
+; RV32IMZBS-NEXT: and a2, a2, a0
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a1, s4, 1
+; RV32IMZBS-NEXT: srli s4, s4, 31
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: andi s9, s10, 1
+; RV32IMZBS-NEXT: seqz s9, s9
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi s9, s9, -1
+; RV32IMZBS-NEXT: slli s0, s2, 31
+; RV32IMZBS-NEXT: and s2, a1, s2
+; RV32IMZBS-NEXT: and s9, s9, t2
+; RV32IMZBS-NEXT: and a3, a1, t2
+; RV32IMZBS-NEXT: slli t2, t2, 31
+; RV32IMZBS-NEXT: srli a1, s10, 31
+; RV32IMZBS-NEXT: seqz s4, s4
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a0, s4, s0
+; RV32IMZBS-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a1, t2
+; RV32IMZBS-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, s4, t2
+; RV32IMZBS-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, t5
; RV32IMZBS-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and a5, a5, a0
-; RV32IMZBS-NEXT: sw a5, 352(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 2
-; RV32IMZBS-NEXT: sw a3, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 1
-; RV32IMZBS-NEXT: sw a3, 692(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 4
-; RV32IMZBS-NEXT: sw a3, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 8
-; RV32IMZBS-NEXT: sw a3, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 16
-; RV32IMZBS-NEXT: sw a3, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 32
-; RV32IMZBS-NEXT: sw a3, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 64
-; RV32IMZBS-NEXT: sw a3, 344(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s9, ra, 128
-; RV32IMZBS-NEXT: sw s9, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 256
-; RV32IMZBS-NEXT: sw a3, 348(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s8, ra, 512
-; RV32IMZBS-NEXT: sw s8, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s7, ra, 1024
-; RV32IMZBS-NEXT: sw s7, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s5, ra, a7
-; RV32IMZBS-NEXT: sw s5, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s6, ra, s6
-; RV32IMZBS-NEXT: and s4, ra, s4
-; RV32IMZBS-NEXT: sw s4, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s3, ra, s3
-; RV32IMZBS-NEXT: sw s3, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s2, ra, s2
-; RV32IMZBS-NEXT: sw s2, 432(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t6, ra, t6
-; RV32IMZBS-NEXT: sw t6, 436(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s1, ra, s1
-; RV32IMZBS-NEXT: and t5, ra, t5
-; RV32IMZBS-NEXT: sw t5, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t4, ra, t4
-; RV32IMZBS-NEXT: sw t4, 444(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t3, ra, t3
-; RV32IMZBS-NEXT: sw t3, 448(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t1, ra, t1
-; RV32IMZBS-NEXT: sw t1, 452(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a6, ra, a6
-; RV32IMZBS-NEXT: sw a6, 456(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 2048
-; RV32IMZBS-NEXT: and a7, ra, a3
-; RV32IMZBS-NEXT: lui a3, 4096
-; RV32IMZBS-NEXT: and a5, ra, a3
-; RV32IMZBS-NEXT: sw a5, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 8192
-; RV32IMZBS-NEXT: and a3, ra, a3
-; RV32IMZBS-NEXT: sw a3, 464(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s0, ra, s0
-; RV32IMZBS-NEXT: and t2, ra, t2
-; RV32IMZBS-NEXT: and t0, ra, t0
-; RV32IMZBS-NEXT: and s11, ra, s11
-; RV32IMZBS-NEXT: sw ra, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, ra, a4
-; RV32IMZBS-NEXT: sw a4, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, ra, a0
-; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw ra, 344(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, ra
-; RV32IMZBS-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s9
-; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s9, 348(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, s9
-; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s8
-; RV32IMZBS-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s7
-; RV32IMZBS-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s5
-; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s6
-; RV32IMZBS-NEXT: mv s5, s6
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s4
-; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s3
-; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s2
-; RV32IMZBS-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t6
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s1
-; RV32IMZBS-NEXT: mv t6, s1
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t5
-; RV32IMZBS-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t4
-; RV32IMZBS-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t3
-; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t1
-; RV32IMZBS-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, a6
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, a7
-; RV32IMZBS-NEXT: mv a6, a7
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, a5
-; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, a3
-; RV32IMZBS-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s0
-; RV32IMZBS-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t2
-; RV32IMZBS-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t0
-; RV32IMZBS-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a2, s11
-; RV32IMZBS-NEXT: mv t1, s11
-; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a5, a2, a0
-; RV32IMZBS-NEXT: mul a0, a2, a4
-; RV32IMZBS-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t3, a1, a0
-; RV32IMZBS-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t4, a1, a0
+; RV32IMZBS-NEXT: xor a0, s3, s8
+; RV32IMZBS-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s1, a1, a0
-; RV32IMZBS-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s3, a1, a0
-; RV32IMZBS-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s4, a1, a0
-; RV32IMZBS-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s6, a1, a0
-; RV32IMZBS-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s7, a1, a0
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s8, a1, a0
-; RV32IMZBS-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
+; RV32IMZBS-NEXT: lw a1, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a7, a1, a0
-; RV32IMZBS-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s11, a1, a0
-; RV32IMZBS-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 440(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 380(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 368(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
+; RV32IMZBS-NEXT: xor a0, a0, a5
; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 364(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, ra
-; RV32IMZBS-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
+; RV32IMZBS-NEXT: xor a0, s1, s6
; RV32IMZBS-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s9
-; RV32IMZBS-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s5
-; RV32IMZBS-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t6
-; RV32IMZBS-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, s11, a0
+; RV32IMZBS-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a6
-; RV32IMZBS-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s0
-; RV32IMZBS-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t2
-; RV32IMZBS-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t0
-; RV32IMZBS-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t1
-; RV32IMZBS-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a4
-; RV32IMZBS-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 728(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, a1, a2
-; RV32IMZBS-NEXT: slli a1, a1, 24
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli a0, a4, 8
-; RV32IMZBS-NEXT: and a0, a0, a2
-; RV32IMZBS-NEXT: srli a1, a4, 24
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a4, a2
-; RV32IMZBS-NEXT: slli a4, a4, 24
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a0, a4, a0
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 448(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 388(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, s2, a0
+; RV32IMZBS-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 324(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 300(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s11, a0, a1
+; RV32IMZBS-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 220(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a3, a3, a5
-; RV32IMZBS-NEXT: sw a3, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t4, t3
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t5, s1
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, s2, s3
-; RV32IMZBS-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, s4, s6
-; RV32IMZBS-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, s7, s8
-; RV32IMZBS-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, a7, s11
-; RV32IMZBS-NEXT: sw a0, 324(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli a0, s10, 8
-; RV32IMZBS-NEXT: and a0, a0, a2
-; RV32IMZBS-NEXT: srli a1, s10, 24
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s6, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: andi a0, s6, 2
-; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 1
-; RV32IMZBS-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 4
-; RV32IMZBS-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 8
-; RV32IMZBS-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 16
-; RV32IMZBS-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 32
-; RV32IMZBS-NEXT: sw a0, 292(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 64
-; RV32IMZBS-NEXT: sw a0, 280(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 128
-; RV32IMZBS-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 256
-; RV32IMZBS-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 512
-; RV32IMZBS-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 1024
-; RV32IMZBS-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw t3, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, s6, t3
-; RV32IMZBS-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 1
-; RV32IMZBS-NEXT: and a0, s6, a3
-; RV32IMZBS-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t1, 2
-; RV32IMZBS-NEXT: and a0, s6, t1
-; RV32IMZBS-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s0, 4
-; RV32IMZBS-NEXT: and a0, s6, s0
-; RV32IMZBS-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t4, 8
-; RV32IMZBS-NEXT: and a0, s6, t4
-; RV32IMZBS-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a5, 16
-; RV32IMZBS-NEXT: and a0, s6, a5
-; RV32IMZBS-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t5, 32
-; RV32IMZBS-NEXT: and a0, s6, t5
-; RV32IMZBS-NEXT: sw a0, 108(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s7, 64
-; RV32IMZBS-NEXT: and a0, s6, s7
-; RV32IMZBS-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s1, 128
-; RV32IMZBS-NEXT: and a0, s6, s1
-; RV32IMZBS-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s2, 256
-; RV32IMZBS-NEXT: and a0, s6, s2
-; RV32IMZBS-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 512
-; RV32IMZBS-NEXT: and a0, s6, a2
-; RV32IMZBS-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s4, 1024
-; RV32IMZBS-NEXT: and a0, s6, s4
-; RV32IMZBS-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s5, 2048
-; RV32IMZBS-NEXT: and a0, s6, s5
-; RV32IMZBS-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a7, 4096
-; RV32IMZBS-NEXT: and a0, s6, a7
-; RV32IMZBS-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 8192
-; RV32IMZBS-NEXT: and a0, s6, a1
-; RV32IMZBS-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 16384
-; RV32IMZBS-NEXT: and a0, s6, t0
-; RV32IMZBS-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a4, 32768
-; RV32IMZBS-NEXT: and a0, s6, a4
-; RV32IMZBS-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and a0, s6, a0
-; RV32IMZBS-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and a0, s6, a0
-; RV32IMZBS-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 262144
-; RV32IMZBS-NEXT: and a0, s6, a0
-; RV32IMZBS-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and a0, s6, a0
-; RV32IMZBS-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s11, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: andi a0, s11, 2
-; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 1
-; RV32IMZBS-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 4
-; RV32IMZBS-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 8
-; RV32IMZBS-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 16
-; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 32
-; RV32IMZBS-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 64
-; RV32IMZBS-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s3, s11, 128
-; RV32IMZBS-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 256
-; RV32IMZBS-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi ra, s11, 512
-; RV32IMZBS-NEXT: andi a0, s11, 1024
-; RV32IMZBS-NEXT: and s8, s11, t3
-; RV32IMZBS-NEXT: and t2, s11, a3
-; RV32IMZBS-NEXT: sw t2, 20(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a6, s11, t1
-; RV32IMZBS-NEXT: sw a6, 24(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t6, s11, s0
-; RV32IMZBS-NEXT: and t3, s11, t4
-; RV32IMZBS-NEXT: and t1, s11, a5
-; RV32IMZBS-NEXT: and t4, s11, t5
-; RV32IMZBS-NEXT: and t5, s11, s7
-; RV32IMZBS-NEXT: and s0, s11, s1
-; RV32IMZBS-NEXT: and s1, s11, s2
-; RV32IMZBS-NEXT: and a5, s11, a2
-; RV32IMZBS-NEXT: sw a5, 28(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a3, s11, s4
-; RV32IMZBS-NEXT: sw a3, 32(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s2, s11, s5
-; RV32IMZBS-NEXT: and a2, s11, a7
-; RV32IMZBS-NEXT: and s9, s11, a1
-; RV32IMZBS-NEXT: and t0, s11, t0
-; RV32IMZBS-NEXT: and s7, s11, a4
-; RV32IMZBS-NEXT: lui a1, 65536
-; RV32IMZBS-NEXT: and a7, s11, a1
-; RV32IMZBS-NEXT: lui a1, 131072
-; RV32IMZBS-NEXT: and s6, s11, a1
-; RV32IMZBS-NEXT: lui a1, 262144
-; RV32IMZBS-NEXT: and s4, s11, a1
-; RV32IMZBS-NEXT: lui a1, 524288
-; RV32IMZBS-NEXT: and s5, s11, a1
-; RV32IMZBS-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s11, 8(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, s11
-; RV32IMZBS-NEXT: sw a4, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 216(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 212(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 208(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 96(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s3
-; RV32IMZBS-NEXT: sw a4, 204(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, s3
-; RV32IMZBS-NEXT: sw a4, 200(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, ra
-; RV32IMZBS-NEXT: sw a4, 288(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a0
-; RV32IMZBS-NEXT: sw a4, 168(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s8
-; RV32IMZBS-NEXT: sw a4, 196(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t2
-; RV32IMZBS-NEXT: sw a4, 188(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a6
-; RV32IMZBS-NEXT: sw a4, 284(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t6
-; RV32IMZBS-NEXT: mv a6, t6
-; RV32IMZBS-NEXT: sw a4, 164(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t3
-; RV32IMZBS-NEXT: mv t2, t3
-; RV32IMZBS-NEXT: sw a4, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t1
-; RV32IMZBS-NEXT: mv t3, t1
-; RV32IMZBS-NEXT: sw a4, 180(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t4
-; RV32IMZBS-NEXT: mv t1, t4
-; RV32IMZBS-NEXT: sw a4, 176(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t5
-; RV32IMZBS-NEXT: mv t4, t5
-; RV32IMZBS-NEXT: sw a4, 276(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s0
-; RV32IMZBS-NEXT: mv t6, s0
-; RV32IMZBS-NEXT: sw a4, 160(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s1
-; RV32IMZBS-NEXT: mv s0, s1
-; RV32IMZBS-NEXT: sw a4, 320(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a5
-; RV32IMZBS-NEXT: sw a4, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a3
-; RV32IMZBS-NEXT: sw a4, 156(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s2
-; RV32IMZBS-NEXT: mv s1, s2
-; RV32IMZBS-NEXT: sw a4, 152(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a2
-; RV32IMZBS-NEXT: sw a4, 272(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s9
-; RV32IMZBS-NEXT: sw a4, 148(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t0
-; RV32IMZBS-NEXT: sw a4, 316(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s7
-; RV32IMZBS-NEXT: sw a4, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a7
-; RV32IMZBS-NEXT: sw a4, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s6
-; RV32IMZBS-NEXT: sw a4, 92(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a5, a1, s4
-; RV32IMZBS-NEXT: sw a5, 144(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mv a5, s4
-; RV32IMZBS-NEXT: mul a1, a1, s5
-; RV32IMZBS-NEXT: mv t5, s5
-; RV32IMZBS-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, a0, a1
+; RV32IMZBS-NEXT: xor a0, ra, s7
+; RV32IMZBS-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor ra, s9, t0
+; RV32IMZBS-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw a1, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 300(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 296(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 292(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 280(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 172(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 256(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 296(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s2, s10, a1
-; RV32IMZBS-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s4, s10, a1
-; RV32IMZBS-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 292(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 304(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s9, a0, a1
+; RV32IMZBS-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 268(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, a0, a1
+; RV32IMZBS-NEXT: lw a0, 228(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 204(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, a0, s0
+; RV32IMZBS-NEXT: lw a0, 156(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 136(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, a0, s1
+; RV32IMZBS-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 280(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 300(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s2, a0, a1
+; RV32IMZBS-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s11, s10, s11
-; RV32IMZBS-NEXT: lw a1, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, s3
-; RV32IMZBS-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, ra
-; RV32IMZBS-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, a0
-; RV32IMZBS-NEXT: sw a1, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s8, s10, s8
-; RV32IMZBS-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul ra, s10, a0
-; RV32IMZBS-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, s10, a0
-; RV32IMZBS-NEXT: mul a1, s10, a6
-; RV32IMZBS-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, t2
-; RV32IMZBS-NEXT: sw a1, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t2, s10, t3
-; RV32IMZBS-NEXT: mul t3, s10, t1
-; RV32IMZBS-NEXT: mul t1, s10, t4
-; RV32IMZBS-NEXT: mul a1, s10, t6
-; RV32IMZBS-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, s0
-; RV32IMZBS-NEXT: sw a1, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a0
-; RV32IMZBS-NEXT: sw a1, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s3, a0, a1
+; RV32IMZBS-NEXT: xor s4, a4, a2
+; RV32IMZBS-NEXT: xor s5, t1, s5
+; RV32IMZBS-NEXT: xor t0, t4, t3
+; RV32IMZBS-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a0, a1
+; RV32IMZBS-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 604(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, a0, a1
+; RV32IMZBS-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a0, a1
+; RV32IMZBS-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, a0, a1
+; RV32IMZBS-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, a0, a1
+; RV32IMZBS-NEXT: lw a0, 396(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, a7, a0
+; RV32IMZBS-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a3, a0
+; RV32IMZBS-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 336(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 316(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 292(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: lw a2, 248(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 240(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: lw a3, 164(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 160(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: lw a4, 96(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 88(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 56(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a6, 52(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, a6
+; RV32IMZBS-NEXT: lw a6, 24(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a6, s7
+; RV32IMZBS-NEXT: lw s7, 356(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s7, s8
+; RV32IMZBS-NEXT: sw s7, 532(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 504(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 500(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 564(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 488(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 452(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 476(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 432(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 440(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 472(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 392(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 416(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 408(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s7, s8
+; RV32IMZBS-NEXT: sw s7, 452(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 384(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 448(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 440(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 560(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 432(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 480(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 428(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 456(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 436(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 400(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 420(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s7, s11
+; RV32IMZBS-NEXT: sw s7, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 320(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 412(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 252(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 388(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 184(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 380(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 104(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 376(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s8, s7
+; RV32IMZBS-NEXT: lw s7, 60(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, s10, s7
+; RV32IMZBS-NEXT: lw s7, 20(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s11, 368(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s11, s7
+; RV32IMZBS-NEXT: sw s7, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s9, ra, s9
+; RV32IMZBS-NEXT: lw s7, 328(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, s6, s7
+; RV32IMZBS-NEXT: sw s6, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s6, 260(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s0, s6
+; RV32IMZBS-NEXT: sw s0, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s0, 192(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, s0
+; RV32IMZBS-NEXT: lw s0, 112(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s2, s0
+; RV32IMZBS-NEXT: sw s0, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s0, 64(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s3, s0
+; RV32IMZBS-NEXT: sw s0, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s4, s0
+; RV32IMZBS-NEXT: sw s0, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor t0, s5, t0
+; RV32IMZBS-NEXT: sw t0, 572(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t1, t0
+; RV32IMZBS-NEXT: sw t0, 564(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t2, t0
+; RV32IMZBS-NEXT: sw t0, 560(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t3, t0
+; RV32IMZBS-NEXT: sw t0, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t4, t0
+; RV32IMZBS-NEXT: sw t0, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 444(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t5, t0
+; RV32IMZBS-NEXT: sw t0, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 404(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t6, t0
+; RV32IMZBS-NEXT: sw t0, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a0, a7, a0
+; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a0
+; RV32IMZBS-NEXT: lw a0, 208(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a3, a0
+; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a4, a0
+; RV32IMZBS-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a5, a0
+; RV32IMZBS-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t4, s10, a0
-; RV32IMZBS-NEXT: mul t6, s10, s1
-; RV32IMZBS-NEXT: mul a6, s10, a2
-; RV32IMZBS-NEXT: mul a0, s10, s9
-; RV32IMZBS-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, s10, t0
-; RV32IMZBS-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, s10, s7
+; RV32IMZBS-NEXT: xor a0, a6, a0
+; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lui a4, 61681
+; RV32IMZBS-NEXT: addi a4, a4, -241
+; RV32IMZBS-NEXT: sw a4, 696(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a3, a0, 4
+; RV32IMZBS-NEXT: and t1, a0, a4
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: slli t1, t1, 4
+; RV32IMZBS-NEXT: or a0, a3, t1
+; RV32IMZBS-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t1, a0, 4
+; RV32IMZBS-NEXT: and t2, a0, a4
+; RV32IMZBS-NEXT: and t1, t1, a4
+; RV32IMZBS-NEXT: slli t2, t2, 4
+; RV32IMZBS-NEXT: or a0, t1, t2
; RV32IMZBS-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, s10, a7
+; RV32IMZBS-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, a1, a0
+; RV32IMZBS-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 448(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 440(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s11, a1, a0
+; RV32IMZBS-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 432(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t2, a0, 4
+; RV32IMZBS-NEXT: and t3, a0, a4
+; RV32IMZBS-NEXT: and t2, t2, a4
+; RV32IMZBS-NEXT: slli t3, t3, 4
+; RV32IMZBS-NEXT: or a0, t2, t3
; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, s10, s6
-; RV32IMZBS-NEXT: mul s5, s10, a5
-; RV32IMZBS-NEXT: mul a4, s10, t5
-; RV32IMZBS-NEXT: lw a1, 728(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and s3, s10, a1
-; RV32IMZBS-NEXT: slli s10, s10, 24
-; RV32IMZBS-NEXT: slli s3, s3, 8
-; RV32IMZBS-NEXT: or a5, s10, s3
-; RV32IMZBS-NEXT: lw s9, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s6, s9, 8
-; RV32IMZBS-NEXT: and s6, s6, a1
-; RV32IMZBS-NEXT: srli s7, s9, 24
-; RV32IMZBS-NEXT: or a7, s6, s7
-; RV32IMZBS-NEXT: and s7, s9, a1
-; RV32IMZBS-NEXT: slli s9, s9, 24
-; RV32IMZBS-NEXT: slli s7, s7, 8
-; RV32IMZBS-NEXT: or t0, s9, s7
-; RV32IMZBS-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, a2, a1
-; RV32IMZBS-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, a1, a2
-; RV32IMZBS-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 208(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, a1, a2
-; RV32IMZBS-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 200(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, a1, a2
-; RV32IMZBS-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 188(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, a1, a2
-; RV32IMZBS-NEXT: lw a1, 180(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 176(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s9, a1, a2
-; RV32IMZBS-NEXT: lw a1, 156(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 152(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, a1, a2
-; RV32IMZBS-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 92(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 140(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 136(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, a2, a1
-; RV32IMZBS-NEXT: lw a1, 132(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 128(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 120(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 112(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 100(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor s4, s2, s4
-; RV32IMZBS-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, a2, a1
-; RV32IMZBS-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 380(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 372(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 364(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 356(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 348(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 340(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: sw a1, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, s11, a1
-; RV32IMZBS-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 372(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a1, a2
-; RV32IMZBS-NEXT: xor s8, s8, ra
-; RV32IMZBS-NEXT: xor t2, t2, t3
-; RV32IMZBS-NEXT: xor t3, t4, t6
-; RV32IMZBS-NEXT: xor t4, a0, s5
-; RV32IMZBS-NEXT: lw a0, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t3, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, a4
+; RV32IMZBS-NEXT: and t3, t3, a4
+; RV32IMZBS-NEXT: slli a0, a0, 4
+; RV32IMZBS-NEXT: or a0, t3, a0
+; RV32IMZBS-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 336(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 328(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 324(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: or a0, t0, a7
-; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a5, t5, s1
-; RV32IMZBS-NEXT: lw a0, 96(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, s3, a0
-; RV32IMZBS-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, s6, a0
-; RV32IMZBS-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, s7, a0
-; RV32IMZBS-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, s9, a0
-; RV32IMZBS-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s10, a0
-; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s0, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s4, a0
-; RV32IMZBS-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s2, a0
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, s11
-; RV32IMZBS-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, a1, a0
-; RV32IMZBS-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, a2, a0
-; RV32IMZBS-NEXT: xor ra, s8, a3
-; RV32IMZBS-NEXT: xor a0, t2, t1
-; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t3, a6
-; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t4, a4
-; RV32IMZBS-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t6, s5
-; RV32IMZBS-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, a1, a0
-; RV32IMZBS-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s9, a0, a1
-; RV32IMZBS-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 636(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor s5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 564(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, a1, a0
+; RV32IMZBS-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s8, a0
+; RV32IMZBS-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s10, a0
+; RV32IMZBS-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, s9, a0
+; RV32IMZBS-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 604(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s0, a0
+; RV32IMZBS-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor s1, s1, a0
-; RV32IMZBS-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, a1, a0
-; RV32IMZBS-NEXT: xor s4, a5, a7
-; RV32IMZBS-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, t0, a0
-; RV32IMZBS-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t5, a0
-; RV32IMZBS-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, s6, a0
-; RV32IMZBS-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, a1, a0
+; RV32IMZBS-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a1, a0
-; RV32IMZBS-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a1, a0
+; RV32IMZBS-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a7, a1, a0
-; RV32IMZBS-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t0, a1, a0
-; RV32IMZBS-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t1, a0, a1
-; RV32IMZBS-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a0
-; RV32IMZBS-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a5, 424(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 364(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a2, a0
+; RV32IMZBS-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 480(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a5, a5, a0
-; RV32IMZBS-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a0
-; RV32IMZBS-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a3, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 200(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a1, a0
+; RV32IMZBS-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 456(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a3, a3, a0
-; RV32IMZBS-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, a0, s10
-; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, s11, a0
-; RV32IMZBS-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor ra, ra, a0
-; RV32IMZBS-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, a0, ra
+; RV32IMZBS-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, s6, a1
-; RV32IMZBS-NEXT: lw s6, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: lw a2, 492(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, s8, a2
+; RV32IMZBS-NEXT: lw s8, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s11, s8, s11
+; RV32IMZBS-NEXT: lw s8, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: lw s9, 592(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s10, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, s10, s9
+; RV32IMZBS-NEXT: lw s10, 496(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 544(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, s7, s10
+; RV32IMZBS-NEXT: lw s7, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s5, s7, s5
+; RV32IMZBS-NEXT: lw s7, 296(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor s6, s6, s7
-; RV32IMZBS-NEXT: sw s6, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s6, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, s7, s6
-; RV32IMZBS-NEXT: sw s6, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s6, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, s7, s6
-; RV32IMZBS-NEXT: lw s6, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, s8, s6
-; RV32IMZBS-NEXT: xor s5, s9, s5
-; RV32IMZBS-NEXT: lw s6, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s1, s6
-; RV32IMZBS-NEXT: lw s6, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, s2, s6
-; RV32IMZBS-NEXT: lw s6, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, s3, s6
+; RV32IMZBS-NEXT: lw s7, 212(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, s7
+; RV32IMZBS-NEXT: lw s7, 116(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, s7
; RV32IMZBS-NEXT: xor s0, s4, s0
-; RV32IMZBS-NEXT: sw s0, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s0, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, s0
-; RV32IMZBS-NEXT: lw s0, 320(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t4, s0
-; RV32IMZBS-NEXT: lw s0, 316(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, t5, s0
-; RV32IMZBS-NEXT: xor s0, t6, t2
-; RV32IMZBS-NEXT: lw t2, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, t2
-; RV32IMZBS-NEXT: lw t2, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, t2
-; RV32IMZBS-NEXT: lw t2, 300(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, t2
+; RV32IMZBS-NEXT: lw s4, 304(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, s4
+; RV32IMZBS-NEXT: lw s4, 216(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, s4
+; RV32IMZBS-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, s4
+; RV32IMZBS-NEXT: xor t2, t6, t2
+; RV32IMZBS-NEXT: lw t6, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, t3, t6
+; RV32IMZBS-NEXT: lw t6, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t6
+; RV32IMZBS-NEXT: lw t6, 512(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t6
; RV32IMZBS-NEXT: xor a4, t1, a4
-; RV32IMZBS-NEXT: lw t1, 432(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t1, 312(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a5, a5, t1
-; RV32IMZBS-NEXT: lw t1, 448(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t1
-; RV32IMZBS-NEXT: lw t1, 456(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t1, 224(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a6, t1
+; RV32IMZBS-NEXT: lw t1, 128(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a3, a3, t1
-; RV32IMZBS-NEXT: xor t2, s10, s11
-; RV32IMZBS-NEXT: lw t1, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, ra, t1
-; RV32IMZBS-NEXT: lw t1, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, t1
-; RV32IMZBS-NEXT: lw t1, 252(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, a1, t1
-; RV32IMZBS-NEXT: lui a1, 61681
-; RV32IMZBS-NEXT: addi s4, a1, -241
-; RV32IMZBS-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s9, a1, 4
-; RV32IMZBS-NEXT: and s10, a1, s4
-; RV32IMZBS-NEXT: and s9, s9, s4
-; RV32IMZBS-NEXT: slli s10, s10, 4
-; RV32IMZBS-NEXT: or s9, s9, s10
-; RV32IMZBS-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s10, a1, 4
-; RV32IMZBS-NEXT: and s11, a1, s4
-; RV32IMZBS-NEXT: and s10, s10, s4
-; RV32IMZBS-NEXT: slli s11, s11, 4
-; RV32IMZBS-NEXT: or s10, s10, s11
-; RV32IMZBS-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, a1, s6
-; RV32IMZBS-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, s7, a1
-; RV32IMZBS-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, s8, a1
-; RV32IMZBS-NEXT: xor s1, s5, s1
-; RV32IMZBS-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, s2, a1
-; RV32IMZBS-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, s3, a1
-; RV32IMZBS-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s5, a1, 4
-; RV32IMZBS-NEXT: and s11, a1, s4
-; RV32IMZBS-NEXT: and s5, s5, s4
-; RV32IMZBS-NEXT: slli s11, s11, 4
-; RV32IMZBS-NEXT: or s5, s5, s11
-; RV32IMZBS-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s11, a1, 4
-; RV32IMZBS-NEXT: and ra, a1, s4
-; RV32IMZBS-NEXT: and s11, s11, s4
-; RV32IMZBS-NEXT: slli ra, ra, 4
-; RV32IMZBS-NEXT: or s11, s11, ra
-; RV32IMZBS-NEXT: lw a1, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, a1, t3
-; RV32IMZBS-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t4, a1
-; RV32IMZBS-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, t5, a1
-; RV32IMZBS-NEXT: xor a6, s0, a6
-; RV32IMZBS-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, a1
-; RV32IMZBS-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a1
+; RV32IMZBS-NEXT: xor a0, ra, a0
+; RV32IMZBS-NEXT: lw t1, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, t1
+; RV32IMZBS-NEXT: lw t1, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a2, t1
+; RV32IMZBS-NEXT: xor t6, s11, s8
+; RV32IMZBS-NEXT: lw a2, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, s9, a2
+; RV32IMZBS-NEXT: lw a2, 520(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s10, a2
+; RV32IMZBS-NEXT: xor s5, s5, s6
+; RV32IMZBS-NEXT: lw a2, 232(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, a2
+; RV32IMZBS-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, a2
+; RV32IMZBS-NEXT: xor s0, s0, s1
+; RV32IMZBS-NEXT: lw a2, 236(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, a2
+; RV32IMZBS-NEXT: lw a2, 144(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, a2
+; RV32IMZBS-NEXT: xor t2, t2, t3
+; RV32IMZBS-NEXT: lw a2, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, a2
+; RV32IMZBS-NEXT: lw a2, 528(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, a2
; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: lw a1, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a1
-; RV32IMZBS-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a1
-; RV32IMZBS-NEXT: xor a5, t2, t6
-; RV32IMZBS-NEXT: lw a1, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 244(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a6, a2
+; RV32IMZBS-NEXT: lw a2, 148(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a2
+; RV32IMZBS-NEXT: lui a2, 209715
+; RV32IMZBS-NEXT: addi s8, a2, 819
+; RV32IMZBS-NEXT: lw a2, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a6, a2, 2
+; RV32IMZBS-NEXT: and t3, a2, s8
+; RV32IMZBS-NEXT: and a6, a6, s8
+; RV32IMZBS-NEXT: slli t3, t3, 2
+; RV32IMZBS-NEXT: or a6, a6, t3
+; RV32IMZBS-NEXT: lw a2, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t3, a2, 2
+; RV32IMZBS-NEXT: and s1, a2, s8
+; RV32IMZBS-NEXT: and t3, t3, s8
+; RV32IMZBS-NEXT: slli s1, s1, 2
+; RV32IMZBS-NEXT: or t3, t3, s1
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, a1
-; RV32IMZBS-NEXT: xor t2, s6, s7
-; RV32IMZBS-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, s8, a1
-; RV32IMZBS-NEXT: xor s0, s1, s2
-; RV32IMZBS-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s3, a1
-; RV32IMZBS-NEXT: xor t3, t3, t4
; RV32IMZBS-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t5, a1
-; RV32IMZBS-NEXT: xor a6, a6, a7
-; RV32IMZBS-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, t0, a1
-; RV32IMZBS-NEXT: xor a2, a4, a2
-; RV32IMZBS-NEXT: lw a1, 464(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a1
-; RV32IMZBS-NEXT: xor a0, a5, a0
-; RV32IMZBS-NEXT: lw a1, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, t1, a1
-; RV32IMZBS-NEXT: xor a5, t2, t6
-; RV32IMZBS-NEXT: xor s0, s0, s1
-; RV32IMZBS-NEXT: xor t0, t3, t4
-; RV32IMZBS-NEXT: xor a6, a6, a7
-; RV32IMZBS-NEXT: xor a2, a2, a3
-; RV32IMZBS-NEXT: xor a0, a0, a4
-; RV32IMZBS-NEXT: lui a4, 209715
-; RV32IMZBS-NEXT: addi t5, a4, 819
-; RV32IMZBS-NEXT: srli a3, s9, 2
-; RV32IMZBS-NEXT: and a7, s9, t5
-; RV32IMZBS-NEXT: and a3, a3, t5
-; RV32IMZBS-NEXT: slli a7, a7, 2
-; RV32IMZBS-NEXT: or a3, a3, a7
-; RV32IMZBS-NEXT: srli a7, s10, 2
-; RV32IMZBS-NEXT: and t1, s10, t5
-; RV32IMZBS-NEXT: and a7, a7, t5
-; RV32IMZBS-NEXT: slli t1, t1, 2
-; RV32IMZBS-NEXT: or t1, a7, t1
-; RV32IMZBS-NEXT: lw a1, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a1
-; RV32IMZBS-NEXT: lw a1, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, s0, a1
-; RV32IMZBS-NEXT: srli t2, s5, 2
-; RV32IMZBS-NEXT: and t3, s5, t5
-; RV32IMZBS-NEXT: and t2, t2, t5
-; RV32IMZBS-NEXT: slli t3, t3, 2
-; RV32IMZBS-NEXT: or t2, t2, t3
-; RV32IMZBS-NEXT: srli t3, s11, 2
-; RV32IMZBS-NEXT: and t4, s11, t5
-; RV32IMZBS-NEXT: and t3, t3, t5
-; RV32IMZBS-NEXT: slli t4, t4, 2
-; RV32IMZBS-NEXT: or t3, t3, t4
-; RV32IMZBS-NEXT: lw a1, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a1
-; RV32IMZBS-NEXT: lw a1, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, a1
-; RV32IMZBS-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: sw a1, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, t1, a1
+; RV32IMZBS-NEXT: xor t1, t6, s4
+; RV32IMZBS-NEXT: lw a2, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, s7, a2
+; RV32IMZBS-NEXT: lw a2, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli s1, a2, 2
+; RV32IMZBS-NEXT: and s4, a2, s8
+; RV32IMZBS-NEXT: and s1, s1, s8
+; RV32IMZBS-NEXT: slli s4, s4, 2
+; RV32IMZBS-NEXT: or s1, s1, s4
+; RV32IMZBS-NEXT: lw a2, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli s4, a2, 2
+; RV32IMZBS-NEXT: and s6, a2, s8
+; RV32IMZBS-NEXT: and s4, s4, s8
+; RV32IMZBS-NEXT: slli s6, s6, 2
+; RV32IMZBS-NEXT: or s4, s4, s6
+; RV32IMZBS-NEXT: xor s2, s5, s2
+; RV32IMZBS-NEXT: lw a2, 172(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, a2
+; RV32IMZBS-NEXT: xor t4, s0, t4
+; RV32IMZBS-NEXT: lw a2, 168(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, a2
+; RV32IMZBS-NEXT: xor a7, t2, a7
+; RV32IMZBS-NEXT: lw a2, 568(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, a2
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 176(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a3, a5
+; RV32IMZBS-NEXT: lui a2, 349525
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, a7, a5
-; RV32IMZBS-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, a6, t0
-; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t4, 349525
-; RV32IMZBS-NEXT: addi t4, t4, 1365
-; RV32IMZBS-NEXT: srli a0, a3, 1
-; RV32IMZBS-NEXT: and a2, a3, t4
-; RV32IMZBS-NEXT: and a0, a0, t4
-; RV32IMZBS-NEXT: slli a2, a2, 1
-; RV32IMZBS-NEXT: or a1, a0, a2
-; RV32IMZBS-NEXT: srli a2, t1, 1
-; RV32IMZBS-NEXT: and a3, t1, t4
-; RV32IMZBS-NEXT: and a2, a2, t4
-; RV32IMZBS-NEXT: slli a3, a3, 1
-; RV32IMZBS-NEXT: or a0, a2, a3
-; RV32IMZBS-NEXT: srli a3, t2, 1
-; RV32IMZBS-NEXT: and a5, t2, t4
-; RV32IMZBS-NEXT: and a3, a3, t4
-; RV32IMZBS-NEXT: slli a5, a5, 1
-; RV32IMZBS-NEXT: or t0, a3, a5
-; RV32IMZBS-NEXT: srli a3, t3, 1
-; RV32IMZBS-NEXT: and a5, t3, t4
-; RV32IMZBS-NEXT: and a3, a3, t4
-; RV32IMZBS-NEXT: slli a5, a5, 1
-; RV32IMZBS-NEXT: or s11, a3, a5
-; RV32IMZBS-NEXT: andi a2, a0, 2
-; RV32IMZBS-NEXT: sw a2, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a5, a0, 1
-; RV32IMZBS-NEXT: andi a6, a0, 4
-; RV32IMZBS-NEXT: andi t1, a0, 8
-; RV32IMZBS-NEXT: andi t2, a0, 16
-; RV32IMZBS-NEXT: andi t3, a0, 32
-; RV32IMZBS-NEXT: andi a7, a0, 64
-; RV32IMZBS-NEXT: andi a2, a0, 128
-; RV32IMZBS-NEXT: sw a2, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t6, a0, 256
-; RV32IMZBS-NEXT: andi s0, a0, 512
-; RV32IMZBS-NEXT: andi s2, a0, 1024
-; RV32IMZBS-NEXT: lw a4, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and s8, a0, a4
-; RV32IMZBS-NEXT: lui s1, 1
-; RV32IMZBS-NEXT: and s9, a0, s1
-; RV32IMZBS-NEXT: lui s1, 2
-; RV32IMZBS-NEXT: and s1, a0, s1
-; RV32IMZBS-NEXT: lui s3, 4
-; RV32IMZBS-NEXT: and s3, a0, s3
-; RV32IMZBS-NEXT: lui s5, 8
-; RV32IMZBS-NEXT: and a2, a0, s5
-; RV32IMZBS-NEXT: sw a2, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s5, 16
-; RV32IMZBS-NEXT: and s5, a0, s5
-; RV32IMZBS-NEXT: lui s6, 32
-; RV32IMZBS-NEXT: and s6, a0, s6
-; RV32IMZBS-NEXT: lui s7, 64
-; RV32IMZBS-NEXT: and s7, a0, s7
-; RV32IMZBS-NEXT: lui s10, 128
-; RV32IMZBS-NEXT: and s10, a0, s10
-; RV32IMZBS-NEXT: lui ra, 256
-; RV32IMZBS-NEXT: and ra, a0, ra
-; RV32IMZBS-NEXT: lui a3, 512
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 1024
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 2048
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 4096
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 8192
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 16384
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 32768
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 65536
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 131072
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 262144
-; RV32IMZBS-NEXT: and a2, a0, a3
-; RV32IMZBS-NEXT: sw a2, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 524288
-; RV32IMZBS-NEXT: and a2, a0, a2
-; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a5
-; RV32IMZBS-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a6
-; RV32IMZBS-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t1
-; RV32IMZBS-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t2
-; RV32IMZBS-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t3
-; RV32IMZBS-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a7
-; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t6
-; RV32IMZBS-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s0
-; RV32IMZBS-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s2
-; RV32IMZBS-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s8
-; RV32IMZBS-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s9
-; RV32IMZBS-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s1
-; RV32IMZBS-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s3
-; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s5
-; RV32IMZBS-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s6
-; RV32IMZBS-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s7
-; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s10
-; RV32IMZBS-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, ra
-; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: addi s5, a2, 1365
+; RV32IMZBS-NEXT: xor a1, t1, t6
+; RV32IMZBS-NEXT: srli t1, a6, 1
+; RV32IMZBS-NEXT: and a6, a6, s5
+; RV32IMZBS-NEXT: xor t2, s2, s3
+; RV32IMZBS-NEXT: srli t6, t3, 1
+; RV32IMZBS-NEXT: and t3, t3, s5
+; RV32IMZBS-NEXT: xor t4, t4, t5
+; RV32IMZBS-NEXT: srli t5, s1, 1
+; RV32IMZBS-NEXT: and s1, s1, s5
+; RV32IMZBS-NEXT: xor a7, a7, t0
+; RV32IMZBS-NEXT: srli t0, s4, 1
+; RV32IMZBS-NEXT: and s0, s4, s5
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: and a5, t1, s5
+; RV32IMZBS-NEXT: slli a6, a6, 1
+; RV32IMZBS-NEXT: and t1, t6, s5
+; RV32IMZBS-NEXT: slli t3, t3, 1
+; RV32IMZBS-NEXT: lw a2, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, a0, a2
; RV32IMZBS-NEXT: lw a0, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s2, a1, a0
+; RV32IMZBS-NEXT: and t5, t5, s5
+; RV32IMZBS-NEXT: slli s1, s1, 1
+; RV32IMZBS-NEXT: and t0, t0, s5
+; RV32IMZBS-NEXT: slli s0, s0, 1
; RV32IMZBS-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
+; RV32IMZBS-NEXT: xor s3, t2, a0
+; RV32IMZBS-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, a0
+; RV32IMZBS-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a7, a0
+; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a4, a0
+; RV32IMZBS-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or a0, a5, a6
+; RV32IMZBS-NEXT: or a1, t1, t3
+; RV32IMZBS-NEXT: srli a4, t3, 31
+; RV32IMZBS-NEXT: xor a2, s2, t6
+; RV32IMZBS-NEXT: sw a2, 708(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or a3, t5, s1
+; RV32IMZBS-NEXT: or t3, t0, s0
+; RV32IMZBS-NEXT: srli s0, s0, 31
+; RV32IMZBS-NEXT: xor a2, t4, s3
+; RV32IMZBS-NEXT: sw a2, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, a0, 1
+; RV32IMZBS-NEXT: andi a6, a1, 2
+; RV32IMZBS-NEXT: slli a7, a0, 2
+; RV32IMZBS-NEXT: andi t0, a1, 4
+; RV32IMZBS-NEXT: slli t1, a0, 3
+; RV32IMZBS-NEXT: andi t4, a1, 8
+; RV32IMZBS-NEXT: slli t5, a0, 4
+; RV32IMZBS-NEXT: andi t6, a1, 16
+; RV32IMZBS-NEXT: slli s1, a0, 5
+; RV32IMZBS-NEXT: andi s2, a1, 32
+; RV32IMZBS-NEXT: slli s3, a0, 31
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a2, a4, s3
+; RV32IMZBS-NEXT: sw a2, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a3, 31
+; RV32IMZBS-NEXT: seqz s0, s0
+; RV32IMZBS-NEXT: addi s0, s0, -1
+; RV32IMZBS-NEXT: and a4, s0, a4
+; RV32IMZBS-NEXT: sw a4, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a0, 6
+; RV32IMZBS-NEXT: seqz a6, a6
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and a2, a6, a5
+; RV32IMZBS-NEXT: sw a2, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a5, a1, 64
+; RV32IMZBS-NEXT: seqz a6, t0
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and a2, a6, a7
+; RV32IMZBS-NEXT: sw a2, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, a0, 7
+; RV32IMZBS-NEXT: seqz a7, t4
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: and a2, a7, t1
+; RV32IMZBS-NEXT: sw a2, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a7, a1, 128
+; RV32IMZBS-NEXT: seqz t0, t6
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: and a2, t0, t5
+; RV32IMZBS-NEXT: sw a2, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t0, a0, 8
+; RV32IMZBS-NEXT: seqz t1, s2
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: and a2, t1, s1
+; RV32IMZBS-NEXT: sw a2, 664(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi t1, a1, 256
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a4, a5, a4
+; RV32IMZBS-NEXT: sw a4, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a0, 9
+; RV32IMZBS-NEXT: seqz a5, a7
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a2, a5, a6
+; RV32IMZBS-NEXT: sw a2, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a5, a1, 512
+; RV32IMZBS-NEXT: seqz a6, t1
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and a2, a6, t0
+; RV32IMZBS-NEXT: sw a2, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, a0, 10
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a4, a5, a4
+; RV32IMZBS-NEXT: sw a4, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a1, 1024
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a5, a1
+; RV32IMZBS-NEXT: bexti a4, a5, 11
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 11
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 12
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 12
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 13
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 13
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 14
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 14
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 15
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 15
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 656(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 16
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 16
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 17
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 17
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 18
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 18
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 19
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 19
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 20
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 20
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 21
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 21
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 22
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 22
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 23
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 23
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 24
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 24
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 25
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 25
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 592(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 26
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 26
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 27
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 27
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 28
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 28
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 29
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 29
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 576(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a1, a1, 1
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a6, a1, a0
+; RV32IMZBS-NEXT: slli a0, a0, 30
+; RV32IMZBS-NEXT: bexti a1, a5, 30
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a0, a1, a0
; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a2
-; RV32IMZBS-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t2, s11, a4
-; RV32IMZBS-NEXT: lui a0, 1
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t3, 2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 1
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 2
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t3, 4
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 2
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 4
-; RV32IMZBS-NEXT: and s1, s11, a0
-; RV32IMZBS-NEXT: lui a0, 8
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t3, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 3
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 32
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t3, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 4
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 64
-; RV32IMZBS-NEXT: and s6, s11, a0
-; RV32IMZBS-NEXT: lui a0, 128
-; RV32IMZBS-NEXT: and s7, s11, a0
-; RV32IMZBS-NEXT: lui a0, 256
-; RV32IMZBS-NEXT: and s10, s11, a0
-; RV32IMZBS-NEXT: lui a0, 512
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t3, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 5
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 1024
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 2048
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 4096
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 8192
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 16384
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 32768
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, s11, a3
-; RV32IMZBS-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and s2, s11, a0
-; RV32IMZBS-NEXT: andi s8, s11, 2
-; RV32IMZBS-NEXT: andi a0, s11, 1
-; RV32IMZBS-NEXT: andi a1, s11, 4
-; RV32IMZBS-NEXT: andi a2, s11, 8
-; RV32IMZBS-NEXT: andi a3, s11, 16
-; RV32IMZBS-NEXT: andi a4, s11, 32
-; RV32IMZBS-NEXT: andi a5, s11, 64
-; RV32IMZBS-NEXT: andi a6, s11, 128
-; RV32IMZBS-NEXT: andi a7, s11, 256
-; RV32IMZBS-NEXT: andi t1, s11, 512
-; RV32IMZBS-NEXT: andi s11, s11, 1024
-; RV32IMZBS-NEXT: mul ra, t0, s8
-; RV32IMZBS-NEXT: mul s8, t0, a0
-; RV32IMZBS-NEXT: mul s9, t0, a1
-; RV32IMZBS-NEXT: mul s3, t0, a2
-; RV32IMZBS-NEXT: mul s5, t0, a3
-; RV32IMZBS-NEXT: mul s0, t0, a4
-; RV32IMZBS-NEXT: mul a0, t0, a5
+; RV32IMZBS-NEXT: andi a0, t3, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 6
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t3, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 7
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t3, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 8
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t0, a6
-; RV32IMZBS-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t6, t0, a7
-; RV32IMZBS-NEXT: mul t3, t0, t1
-; RV32IMZBS-NEXT: mul a0, t0, s11
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t0, t2
+; RV32IMZBS-NEXT: andi a0, t3, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 9
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t3, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 10
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a0, t3
+; RV32IMZBS-NEXT: bexti a1, a0, 11
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 11
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 12
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli s0, a3, 12
+; RV32IMZBS-NEXT: and a1, a1, s0
+; RV32IMZBS-NEXT: sw a1, 504(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 13
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 13
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 520(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 14
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 14
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 15
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 15
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 532(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 16
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 16
+; RV32IMZBS-NEXT: and s9, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 17
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 17
+; RV32IMZBS-NEXT: and s6, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 18
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 18
+; RV32IMZBS-NEXT: and s10, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 19
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 19
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 500(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 20
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli s1, a3, 20
+; RV32IMZBS-NEXT: and a1, a1, s1
+; RV32IMZBS-NEXT: sw a1, 508(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 21
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 21
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 512(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 22
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 22
+; RV32IMZBS-NEXT: and s0, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 23
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 23
+; RV32IMZBS-NEXT: and t6, a1, a4
+; RV32IMZBS-NEXT: bexti a4, a0, 24
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, a3, 24
+; RV32IMZBS-NEXT: and s1, a4, a5
+; RV32IMZBS-NEXT: bexti a5, a0, 25
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a7, a3, 25
+; RV32IMZBS-NEXT: and s2, a5, a7
+; RV32IMZBS-NEXT: bexti a7, a0, 26
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: slli t0, a3, 26
+; RV32IMZBS-NEXT: and s3, a7, t0
+; RV32IMZBS-NEXT: bexti t0, a0, 27
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: slli t1, a3, 27
+; RV32IMZBS-NEXT: and s7, t0, t1
+; RV32IMZBS-NEXT: bexti t1, a0, 28
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: slli t2, a3, 28
+; RV32IMZBS-NEXT: and s4, t1, t2
+; RV32IMZBS-NEXT: bexti t2, a0, 29
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: slli a2, a3, 29
+; RV32IMZBS-NEXT: and t5, t2, a2
+; RV32IMZBS-NEXT: andi t2, t3, 1
+; RV32IMZBS-NEXT: seqz t2, t2
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: and t2, t2, a3
+; RV32IMZBS-NEXT: slli a3, a3, 30
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and t4, a0, a3
+; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a6, a0
+; RV32IMZBS-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a0, a1
+; RV32IMZBS-NEXT: lw a0, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a0, a1
+; RV32IMZBS-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a0, a1
+; RV32IMZBS-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, a0, a1
+; RV32IMZBS-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a0, a3
+; RV32IMZBS-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a0, a4
+; RV32IMZBS-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a0, a5
; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor t2, t2, a0
; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t1, t0, a0
-; RV32IMZBS-NEXT: mul s1, t0, s1
-; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t0, s6
-; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s6, t0, s7
-; RV32IMZBS-NEXT: mul s7, t0, s10
-; RV32IMZBS-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s10, t0, a0
-; RV32IMZBS-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a7, t0, a0
-; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a6, t0, a0
-; RV32IMZBS-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t2, t0, a0
-; RV32IMZBS-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s11, t0, a0
-; RV32IMZBS-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t0, a0
-; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t0, s2
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 544(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a3, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a3
-; RV32IMZBS-NEXT: lw a3, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a4
-; RV32IMZBS-NEXT: lw a4, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a5, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: lw a5, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw t0, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, t0
-; RV32IMZBS-NEXT: xor s8, s8, ra
-; RV32IMZBS-NEXT: xor s3, s9, s3
-; RV32IMZBS-NEXT: xor s0, s5, s0
-; RV32IMZBS-NEXT: xor t3, t6, t3
-; RV32IMZBS-NEXT: xor t1, t1, s1
-; RV32IMZBS-NEXT: xor t6, s6, s7
-; RV32IMZBS-NEXT: xor a6, a7, a6
-; RV32IMZBS-NEXT: xor a0, s2, a0
-; RV32IMZBS-NEXT: lw a7, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a7
-; RV32IMZBS-NEXT: lw a7, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a7
+; RV32IMZBS-NEXT: lw a2, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw ra, 528(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, ra
+; RV32IMZBS-NEXT: lw ra, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s11, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, ra, s11
+; RV32IMZBS-NEXT: xor s6, s9, s6
+; RV32IMZBS-NEXT: xor t6, s0, t6
+; RV32IMZBS-NEXT: xor t4, t5, t4
+; RV32IMZBS-NEXT: xor t1, t1, t3
+; RV32IMZBS-NEXT: lw t3, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a6, t3
+; RV32IMZBS-NEXT: lw t3, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t3
+; RV32IMZBS-NEXT: lw t3, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t3
+; RV32IMZBS-NEXT: lw t3, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t3
+; RV32IMZBS-NEXT: lw t3, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t3
+; RV32IMZBS-NEXT: lw t3, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, t3
+; RV32IMZBS-NEXT: xor a0, t2, a0
+; RV32IMZBS-NEXT: lw t2, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, t2
+; RV32IMZBS-NEXT: lw t2, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, t2
+; RV32IMZBS-NEXT: lw t2, 520(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, ra, t2
+; RV32IMZBS-NEXT: xor t3, s6, s10
+; RV32IMZBS-NEXT: xor t5, t6, s1
+; RV32IMZBS-NEXT: lw t6, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, t6
+; RV32IMZBS-NEXT: xor a6, t1, a6
+; RV32IMZBS-NEXT: lw t1, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t1
+; RV32IMZBS-NEXT: lw t1, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t1
+; RV32IMZBS-NEXT: lw t1, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t1
+; RV32IMZBS-NEXT: lw t1, 592(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t1
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: lw a2, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, t2, a2
+; RV32IMZBS-NEXT: lw t1, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t3, t1
+; RV32IMZBS-NEXT: xor t2, t5, s2
+; RV32IMZBS-NEXT: xor a6, a6, a7
+; RV32IMZBS-NEXT: lw a7, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, t0, a7
+; RV32IMZBS-NEXT: lw t0, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t0
+; RV32IMZBS-NEXT: lw t0, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t0
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: lw a2, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, t1, a2
+; RV32IMZBS-NEXT: xor t0, t2, s3
+; RV32IMZBS-NEXT: xor a6, a6, a7
; RV32IMZBS-NEXT: lw a7, 636(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a3, a3, a7
; RV32IMZBS-NEXT: lw a7, 608(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a4, a4, a7
-; RV32IMZBS-NEXT: lw a7, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a7
-; RV32IMZBS-NEXT: xor a7, s8, s3
-; RV32IMZBS-NEXT: lw t0, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, s0, t0
-; RV32IMZBS-NEXT: lw t0, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, t0
-; RV32IMZBS-NEXT: lw t0, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, t0
-; RV32IMZBS-NEXT: xor t6, t6, s10
-; RV32IMZBS-NEXT: xor a6, a6, t2
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: lw a2, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: lw a3, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: lw a4, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a5, a4
-; RV32IMZBS-NEXT: xor a5, a7, s0
-; RV32IMZBS-NEXT: lw a7, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, t3, a7
-; RV32IMZBS-NEXT: lw t0, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, t0
-; RV32IMZBS-NEXT: lw t0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t6, t0
-; RV32IMZBS-NEXT: xor a6, a6, s11
-; RV32IMZBS-NEXT: lw t3, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, t3
-; RV32IMZBS-NEXT: lw t3, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, t3
-; RV32IMZBS-NEXT: lw t3, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t3
-; RV32IMZBS-NEXT: lw t3, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, t3
-; RV32IMZBS-NEXT: lw t3, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, t3
-; RV32IMZBS-NEXT: lw t0, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, t0
-; RV32IMZBS-NEXT: lw t0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, t0
-; RV32IMZBS-NEXT: lw t0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, t0
-; RV32IMZBS-NEXT: lw t0, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t2, t0
-; RV32IMZBS-NEXT: lw t0, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, t0
-; RV32IMZBS-NEXT: lw t3, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t3
-; RV32IMZBS-NEXT: lw t3, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, t3
-; RV32IMZBS-NEXT: lw t0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, t0
-; RV32IMZBS-NEXT: lw t0, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, t0
-; RV32IMZBS-NEXT: xor a1, a0, a1
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a4, a2
-; RV32IMZBS-NEXT: xor a4, a5, a7
-; RV32IMZBS-NEXT: xor a4, a4, t1
-; RV32IMZBS-NEXT: lw a7, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, a7
-; RV32IMZBS-NEXT: xor a1, a1, a3
-; RV32IMZBS-NEXT: lw a3, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a3
-; RV32IMZBS-NEXT: xor a3, a4, t2
-; RV32IMZBS-NEXT: lw a4, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a6, a4
-; RV32IMZBS-NEXT: lw t0, 728(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a6, a1, t0
-; RV32IMZBS-NEXT: srli a7, a1, 8
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: and a2, a3, t0
-; RV32IMZBS-NEXT: xor a4, a3, a4
-; RV32IMZBS-NEXT: srli a3, a3, 8
-; RV32IMZBS-NEXT: and a7, a7, t0
-; RV32IMZBS-NEXT: and a3, a3, t0
+; RV32IMZBS-NEXT: xor a2, t0, s7
+; RV32IMZBS-NEXT: xor a3, a6, a3
+; RV32IMZBS-NEXT: lw a6, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a6
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: xor a1, a2, s4
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: xor a3, a3, a5
+; RV32IMZBS-NEXT: xor a0, a0, t4
+; RV32IMZBS-NEXT: srli a1, a3, 8
+; RV32IMZBS-NEXT: srli a2, a3, 24
+; RV32IMZBS-NEXT: slli a4, a3, 24
+; RV32IMZBS-NEXT: lw a5, 720(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: or a1, a1, a2
+; RV32IMZBS-NEXT: srli a2, a0, 8
+; RV32IMZBS-NEXT: slli a3, a3, 8
+; RV32IMZBS-NEXT: or a3, a4, a3
+; RV32IMZBS-NEXT: srli a4, a0, 24
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: or a2, a2, a4
+; RV32IMZBS-NEXT: and a4, a0, a5
; RV32IMZBS-NEXT: slli a0, a0, 24
-; RV32IMZBS-NEXT: slli a6, a6, 8
-; RV32IMZBS-NEXT: or a0, a0, a6
-; RV32IMZBS-NEXT: srli a1, a1, 24
-; RV32IMZBS-NEXT: or a1, a7, a1
-; RV32IMZBS-NEXT: slli a5, a5, 24
-; RV32IMZBS-NEXT: slli a2, a2, 8
-; RV32IMZBS-NEXT: or a2, a5, a2
-; RV32IMZBS-NEXT: srli a4, a4, 24
-; RV32IMZBS-NEXT: or a3, a3, a4
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: or a2, a2, a3
-; RV32IMZBS-NEXT: srli a1, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, s4
-; RV32IMZBS-NEXT: srli a3, a2, 4
-; RV32IMZBS-NEXT: and a2, a2, s4
-; RV32IMZBS-NEXT: and a1, a1, s4
-; RV32IMZBS-NEXT: and a3, a3, s4
+; RV32IMZBS-NEXT: slli a4, a4, 8
+; RV32IMZBS-NEXT: or a0, a0, a4
+; RV32IMZBS-NEXT: or a1, a3, a1
+; RV32IMZBS-NEXT: or a0, a0, a2
+; RV32IMZBS-NEXT: srli a2, a1, 4
+; RV32IMZBS-NEXT: lw a4, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: srli a3, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, a4
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: slli a1, a1, 4
; RV32IMZBS-NEXT: slli a0, a0, 4
-; RV32IMZBS-NEXT: slli a2, a2, 4
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: or a2, a3, a2
-; RV32IMZBS-NEXT: srli a1, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, t5
-; RV32IMZBS-NEXT: srli a3, a2, 2
-; RV32IMZBS-NEXT: and a2, a2, t5
-; RV32IMZBS-NEXT: and a1, a1, t5
-; RV32IMZBS-NEXT: and a3, a3, t5
+; RV32IMZBS-NEXT: or a1, a2, a1
+; RV32IMZBS-NEXT: or a0, a3, a0
+; RV32IMZBS-NEXT: srli a2, a1, 2
+; RV32IMZBS-NEXT: and a1, a1, s8
+; RV32IMZBS-NEXT: srli a3, a0, 2
+; RV32IMZBS-NEXT: and a0, a0, s8
+; RV32IMZBS-NEXT: and a2, a2, s8
+; RV32IMZBS-NEXT: and a3, a3, s8
+; RV32IMZBS-NEXT: slli a1, a1, 2
+; RV32IMZBS-NEXT: or a1, a2, a1
+; RV32IMZBS-NEXT: lui a2, 349525
+; RV32IMZBS-NEXT: addi a2, a2, 1364
; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: lui a1, 349525
-; RV32IMZBS-NEXT: addi a1, a1, 1364
-; RV32IMZBS-NEXT: slli a2, a2, 2
-; RV32IMZBS-NEXT: or a2, a3, a2
-; RV32IMZBS-NEXT: srli a3, a0, 1
-; RV32IMZBS-NEXT: and a0, a0, t4
-; RV32IMZBS-NEXT: and a4, a2, t4
-; RV32IMZBS-NEXT: srli a2, a2, 1
-; RV32IMZBS-NEXT: and a3, a3, a1
-; RV32IMZBS-NEXT: and a1, a2, a1
-; RV32IMZBS-NEXT: slli a0, a0, 1
; RV32IMZBS-NEXT: or a0, a3, a0
-; RV32IMZBS-NEXT: slli a4, a4, 1
-; RV32IMZBS-NEXT: or a1, a1, a4
+; RV32IMZBS-NEXT: srli a3, a1, 1
+; RV32IMZBS-NEXT: and a1, a1, s5
+; RV32IMZBS-NEXT: and a4, a0, s5
; RV32IMZBS-NEXT: srli a0, a0, 1
-; RV32IMZBS-NEXT: lw a2, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a2
+; RV32IMZBS-NEXT: and a3, a3, a2
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: slli a1, a1, 1
+; RV32IMZBS-NEXT: or a1, a3, a1
+; RV32IMZBS-NEXT: slli a4, a4, 1
+; RV32IMZBS-NEXT: or a0, a0, a4
; RV32IMZBS-NEXT: srli a1, a1, 1
-; RV32IMZBS-NEXT: lw a2, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 708(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a0, a0, 1
+; RV32IMZBS-NEXT: lw a2, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a2
+; RV32IMZBS-NEXT: lw a2, 724(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 712(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: sw a4, 0(a2)
-; RV32IMZBS-NEXT: sw a0, 4(a2)
-; RV32IMZBS-NEXT: lw a3, 720(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a1, 4(a2)
+; RV32IMZBS-NEXT: lw a3, 716(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: sw a3, 8(a2)
-; RV32IMZBS-NEXT: sw a1, 12(a2)
-; RV32IMZBS-NEXT: lw a2, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 12(a2)
+; RV32IMZBS-NEXT: lw a2, 728(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: sw a4, 0(a2)
-; RV32IMZBS-NEXT: sw a0, 4(a2)
+; RV32IMZBS-NEXT: sw a1, 4(a2)
; RV32IMZBS-NEXT: sw a3, 8(a2)
-; RV32IMZBS-NEXT: sw a1, 12(a2)
+; RV32IMZBS-NEXT: sw a0, 12(a2)
; RV32IMZBS-NEXT: lw ra, 780(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw s0, 776(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw s1, 772(sp) # 4-byte Folded Reload
@@ -14203,937 +15695,936 @@ define void @commutative_clmul_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %p
;
; RV64IMZBS-LABEL: commutative_clmul_v2i64:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -1120
-; RV64IMZBS-NEXT: sd ra, 1112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s0, 1104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 1096(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s2, 1088(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s3, 1080(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s4, 1072(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s5, 1064(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s6, 1056(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s7, 1048(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s8, 1040(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s9, 1032(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s10, 1024(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s11, 1016(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd a5, 848(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd a4, 840(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi t4, a2, 2
-; RV64IMZBS-NEXT: andi t6, a2, 1
-; RV64IMZBS-NEXT: andi a7, a2, 4
-; RV64IMZBS-NEXT: andi t2, a2, 8
-; RV64IMZBS-NEXT: andi a6, a2, 16
-; RV64IMZBS-NEXT: andi t0, a2, 32
-; RV64IMZBS-NEXT: andi a4, a2, 64
-; RV64IMZBS-NEXT: andi t3, a2, 128
-; RV64IMZBS-NEXT: andi t5, a2, 256
-; RV64IMZBS-NEXT: andi a5, a2, 512
-; RV64IMZBS-NEXT: andi t1, a2, 1024
-; RV64IMZBS-NEXT: bseti s0, zero, 11
-; RV64IMZBS-NEXT: sd s0, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t4, a0, t4
-; RV64IMZBS-NEXT: mul t6, a0, t6
-; RV64IMZBS-NEXT: xor t4, t6, t4
-; RV64IMZBS-NEXT: sd t4, 832(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a7, a0, a7
-; RV64IMZBS-NEXT: mul t2, a0, t2
-; RV64IMZBS-NEXT: xor a7, a7, t2
-; RV64IMZBS-NEXT: sd a7, 816(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a6, a0, a6
-; RV64IMZBS-NEXT: mul a7, a0, t0
-; RV64IMZBS-NEXT: xor a6, a6, a7
-; RV64IMZBS-NEXT: sd a6, 808(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a6, a0, t3
-; RV64IMZBS-NEXT: mul a7, a0, t5
-; RV64IMZBS-NEXT: xor a6, a6, a7
-; RV64IMZBS-NEXT: sd a6, 800(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 792(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a5
-; RV64IMZBS-NEXT: sd a4, 784(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, t1
-; RV64IMZBS-NEXT: sd a4, 824(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s10, zero, 31
-; RV64IMZBS-NEXT: sd s10, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s11, zero, 32
-; RV64IMZBS-NEXT: sd s11, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti ra, zero, 33
-; RV64IMZBS-NEXT: sd ra, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 34
-; RV64IMZBS-NEXT: sd a4, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 35
-; RV64IMZBS-NEXT: sd a4, 1000(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 36
-; RV64IMZBS-NEXT: sd a4, 992(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 37
-; RV64IMZBS-NEXT: sd a4, 984(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 38
-; RV64IMZBS-NEXT: sd a4, 976(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 39
-; RV64IMZBS-NEXT: sd a4, 968(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 40
-; RV64IMZBS-NEXT: sd a4, 960(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 41
-; RV64IMZBS-NEXT: sd a4, 952(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 42
-; RV64IMZBS-NEXT: sd a4, 944(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 43
-; RV64IMZBS-NEXT: sd a4, 936(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 44
-; RV64IMZBS-NEXT: sd a4, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 45
+; RV64IMZBS-NEXT: addi sp, sp, -1040
+; RV64IMZBS-NEXT: sd ra, 1032(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s0, 1024(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 1016(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 1008(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s3, 1000(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s4, 992(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s5, 984(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s6, 976(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s7, 968(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s8, 960(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s9, 952(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s10, 944(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s11, 936(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd a5, 928(sp) # 8-byte Folded Spill
; RV64IMZBS-NEXT: sd a4, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 46
-; RV64IMZBS-NEXT: sd a4, 912(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 47
-; RV64IMZBS-NEXT: sd a4, 904(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 48
-; RV64IMZBS-NEXT: sd a4, 896(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 49
-; RV64IMZBS-NEXT: sd a4, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 50
-; RV64IMZBS-NEXT: sd a4, 880(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 51
-; RV64IMZBS-NEXT: sd a4, 872(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s9, zero, 52
-; RV64IMZBS-NEXT: sd s9, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 53
-; RV64IMZBS-NEXT: sd a4, 864(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s0, zero, 54
-; RV64IMZBS-NEXT: bseti a4, zero, 55
-; RV64IMZBS-NEXT: sd a4, 160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s1, zero, 56
-; RV64IMZBS-NEXT: bseti s2, zero, 57
-; RV64IMZBS-NEXT: bseti s3, zero, 58
-; RV64IMZBS-NEXT: bseti s4, zero, 59
-; RV64IMZBS-NEXT: bseti s5, zero, 60
-; RV64IMZBS-NEXT: bseti s6, zero, 61
-; RV64IMZBS-NEXT: bseti s7, zero, 62
-; RV64IMZBS-NEXT: bseti s8, zero, 63
-; RV64IMZBS-NEXT: ld a4, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 776(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 1
-; RV64IMZBS-NEXT: and a5, a2, a5
-; RV64IMZBS-NEXT: lui a6, 2
-; RV64IMZBS-NEXT: and a6, a2, a6
-; RV64IMZBS-NEXT: lui a7, 4
-; RV64IMZBS-NEXT: and a7, a2, a7
-; RV64IMZBS-NEXT: lui t0, 8
-; RV64IMZBS-NEXT: and t0, a2, t0
-; RV64IMZBS-NEXT: lui t1, 16
-; RV64IMZBS-NEXT: and t1, a2, t1
-; RV64IMZBS-NEXT: lui t2, 32
-; RV64IMZBS-NEXT: and t2, a2, t2
-; RV64IMZBS-NEXT: lui t3, 64
-; RV64IMZBS-NEXT: and t3, a2, t3
-; RV64IMZBS-NEXT: lui t4, 128
-; RV64IMZBS-NEXT: and t4, a2, t4
-; RV64IMZBS-NEXT: lui t5, 256
-; RV64IMZBS-NEXT: and t5, a2, t5
-; RV64IMZBS-NEXT: lui t6, 512
-; RV64IMZBS-NEXT: and t6, a2, t6
-; RV64IMZBS-NEXT: lui a4, 1024
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 768(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 2048
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 760(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 4096
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 752(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 8192
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 744(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 16384
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 32768
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 728(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 65536
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 131072
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 712(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 262144
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s10
-; RV64IMZBS-NEXT: sd a4, 688(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s10, a2, s11
-; RV64IMZBS-NEXT: and s11, a2, ra
-; RV64IMZBS-NEXT: ld ra, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and ra, a2, ra
-; RV64IMZBS-NEXT: ld a4, 1000(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 680(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 992(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 664(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 984(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 648(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 616(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 600(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 960(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 584(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 544(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 488(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 456(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 912(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 896(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 880(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s9
-; RV64IMZBS-NEXT: sd a4, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 864(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s0
-; RV64IMZBS-NEXT: sd a4, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s9, s0
-; RV64IMZBS-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, s0
-; RV64IMZBS-NEXT: sd a4, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s1
-; RV64IMZBS-NEXT: sd a4, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s2
-; RV64IMZBS-NEXT: sd a4, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s3
-; RV64IMZBS-NEXT: sd a4, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s4
-; RV64IMZBS-NEXT: sd a4, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s5
-; RV64IMZBS-NEXT: sd a4, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s6
-; RV64IMZBS-NEXT: sd a4, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s7
-; RV64IMZBS-NEXT: sd a4, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a2, s8
-; RV64IMZBS-NEXT: ld a4, 776(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 512(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a5
-; RV64IMZBS-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a6
-; RV64IMZBS-NEXT: sd a4, 576(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a7
-; RV64IMZBS-NEXT: sd a4, 640(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t0, a0, t0
-; RV64IMZBS-NEXT: mul a4, a0, t1
-; RV64IMZBS-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, t2
-; RV64IMZBS-NEXT: sd a4, 560(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, t3
-; RV64IMZBS-NEXT: sd a4, 624(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, t4
-; RV64IMZBS-NEXT: sd a4, 704(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t5, a0, t5
-; RV64IMZBS-NEXT: mul t6, a0, t6
-; RV64IMZBS-NEXT: ld a4, 768(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 552(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 760(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 752(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 672(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 744(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 744(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 736(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 472(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 728(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 464(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 720(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 536(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 712(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 592(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 696(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 656(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 688(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 712(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, s10
-; RV64IMZBS-NEXT: sd a4, 760(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, s11
-; RV64IMZBS-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, ra
-; RV64IMZBS-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 680(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 520(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 664(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 568(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 648(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 616(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 680(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 600(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 728(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 584(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 776(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 544(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 424(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 528(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 488(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 480(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 456(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 544(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 432(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 600(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 648(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 768(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 456(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 584(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 616(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 664(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 688(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 752(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a0, a0, a2
-; RV64IMZBS-NEXT: sd a0, 488(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a0, a3, 2
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: andi a2, a3, 1
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: xor a0, a2, a0
-; RV64IMZBS-NEXT: sd a0, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a0, a3, 4
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: andi a2, a3, 8
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: xor a0, a0, a2
-; RV64IMZBS-NEXT: sd a0, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a0, a3, 16
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: andi a2, a3, 32
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: xor a0, a0, a2
-; RV64IMZBS-NEXT: sd a0, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a0, a3, 128
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: andi a2, a3, 256
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: xor a0, a0, a2
-; RV64IMZBS-NEXT: sd a0, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and t3, a3, a0
-; RV64IMZBS-NEXT: lui a0, 1
+; RV64IMZBS-NEXT: mv t3, a3
+; RV64IMZBS-NEXT: mv t0, a1
+; RV64IMZBS-NEXT: slli t6, a0, 1
+; RV64IMZBS-NEXT: andi s6, a2, 2
+; RV64IMZBS-NEXT: slli s8, a0, 2
+; RV64IMZBS-NEXT: andi s10, a2, 4
+; RV64IMZBS-NEXT: slli s5, a0, 3
+; RV64IMZBS-NEXT: andi s9, a2, 8
+; RV64IMZBS-NEXT: slli s3, a0, 4
+; RV64IMZBS-NEXT: andi s7, a2, 16
+; RV64IMZBS-NEXT: slli s0, a0, 5
+; RV64IMZBS-NEXT: andi s4, a2, 32
+; RV64IMZBS-NEXT: slli a3, a0, 6
+; RV64IMZBS-NEXT: andi s1, a2, 64
+; RV64IMZBS-NEXT: slli t4, a0, 7
+; RV64IMZBS-NEXT: andi s2, a2, 128
+; RV64IMZBS-NEXT: slli a1, a0, 8
+; RV64IMZBS-NEXT: andi t5, a2, 256
+; RV64IMZBS-NEXT: slli a6, a0, 9
+; RV64IMZBS-NEXT: andi t2, a2, 512
+; RV64IMZBS-NEXT: slli a5, a0, 10
+; RV64IMZBS-NEXT: andi a7, a2, 1024
+; RV64IMZBS-NEXT: slli t1, a0, 11
+; RV64IMZBS-NEXT: not a4, a2
+; RV64IMZBS-NEXT: seqz s6, s6
+; RV64IMZBS-NEXT: addi s6, s6, -1
+; RV64IMZBS-NEXT: and t6, s6, t6
+; RV64IMZBS-NEXT: sd t6, 912(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli t6, a0, 12
+; RV64IMZBS-NEXT: seqz s6, s10
+; RV64IMZBS-NEXT: addi s6, s6, -1
+; RV64IMZBS-NEXT: and s6, s6, s8
+; RV64IMZBS-NEXT: sd s6, 904(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s6, a0, 13
+; RV64IMZBS-NEXT: seqz s8, s9
+; RV64IMZBS-NEXT: addi s8, s8, -1
+; RV64IMZBS-NEXT: and s5, s8, s5
+; RV64IMZBS-NEXT: sd s5, 896(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s5, a0, 14
+; RV64IMZBS-NEXT: seqz s7, s7
+; RV64IMZBS-NEXT: addi s7, s7, -1
+; RV64IMZBS-NEXT: and s3, s7, s3
+; RV64IMZBS-NEXT: sd s3, 880(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s3, a0, 15
+; RV64IMZBS-NEXT: seqz s4, s4
+; RV64IMZBS-NEXT: addi s4, s4, -1
+; RV64IMZBS-NEXT: and s4, s4, s0
+; RV64IMZBS-NEXT: slli s0, a0, 16
+; RV64IMZBS-NEXT: seqz s1, s1
+; RV64IMZBS-NEXT: addi s1, s1, -1
+; RV64IMZBS-NEXT: and a3, s1, a3
+; RV64IMZBS-NEXT: sd a3, 888(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a3, a0, 17
+; RV64IMZBS-NEXT: seqz s1, s2
+; RV64IMZBS-NEXT: addi s1, s1, -1
+; RV64IMZBS-NEXT: and s2, s1, t4
+; RV64IMZBS-NEXT: slli t4, a0, 18
+; RV64IMZBS-NEXT: seqz t5, t5
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: and t5, t5, a1
+; RV64IMZBS-NEXT: slli a1, a0, 19
+; RV64IMZBS-NEXT: seqz t2, t2
+; RV64IMZBS-NEXT: addi t2, t2, -1
+; RV64IMZBS-NEXT: and a6, t2, a6
+; RV64IMZBS-NEXT: sd a6, 848(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a6, a0, 20
+; RV64IMZBS-NEXT: seqz a7, a7
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: and a5, a7, a5
+; RV64IMZBS-NEXT: sd a5, 864(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a5, a4, 11
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and t1, a5, t1
+; RV64IMZBS-NEXT: bexti a5, a4, 12
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and t2, a5, t6
+; RV64IMZBS-NEXT: bexti a5, a4, 13
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a5, a5, s6
+; RV64IMZBS-NEXT: sd a5, 816(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a5, a4, 14
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a5, a5, s5
+; RV64IMZBS-NEXT: sd a5, 840(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a5, a4, 15
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a5, a5, s3
+; RV64IMZBS-NEXT: sd a5, 872(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a5, a4, 16
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and s3, a5, s0
+; RV64IMZBS-NEXT: bexti a5, a4, 17
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a3, a5, a3
+; RV64IMZBS-NEXT: sd a3, 768(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 18
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, t4
+; RV64IMZBS-NEXT: sd a3, 792(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 19
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a1, a3, a1
+; RV64IMZBS-NEXT: sd a1, 824(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 20
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a1, a3, a6
+; RV64IMZBS-NEXT: sd a1, 832(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 21
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 21
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 856(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 22
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 22
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 728(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 23
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 23
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 720(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 24
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 24
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 752(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 25
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 25
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 776(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 26
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 26
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 784(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 27
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 27
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 800(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 28
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 28
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 808(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 29
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 29
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 664(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 30
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 30
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 656(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sraiw a3, a2, 31
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 31
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 688(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 32
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 32
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 704(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 33
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 33
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 712(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 34
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 34
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 736(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 35
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 35
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 744(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 36
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 36
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 760(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 37
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 37
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 592(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 38
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 38
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 576(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 39
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 39
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 624(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 40
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 40
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 632(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 41
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 41
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 640(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 42
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 42
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 648(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 43
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 43
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 672(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 44
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 44
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 680(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 45
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 45
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 696(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 46
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 46
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 536(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 47
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 47
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 528(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 48
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 48
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 544(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 49
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 49
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 552(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 50
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 50
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 560(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 51
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 51
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 568(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 52
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 52
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 584(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 53
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 53
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 600(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 54
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 54
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 616(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 55
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 55
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 608(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 56
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 56
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 472(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 57
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 57
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 464(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 58
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 58
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 480(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 59
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 59
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 488(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 60
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 60
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 496(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 61
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 61
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 504(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 62
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a0, 62
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 512(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a3, a2, 1
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: addi a3, a3, -1
; RV64IMZBS-NEXT: and t4, a3, a0
-; RV64IMZBS-NEXT: lui a0, 2
-; RV64IMZBS-NEXT: and s10, a3, a0
-; RV64IMZBS-NEXT: lui a0, 4
-; RV64IMZBS-NEXT: and s11, a3, a0
-; RV64IMZBS-NEXT: lui a0, 8
-; RV64IMZBS-NEXT: and ra, a3, a0
-; RV64IMZBS-NEXT: lui a0, 16
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 32
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 64
-; RV64IMZBS-NEXT: and a4, a3, a4
-; RV64IMZBS-NEXT: lui a0, 128
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 256
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 512
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 1024
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a2, 2048
-; RV64IMZBS-NEXT: and a2, a3, a2
-; RV64IMZBS-NEXT: lui a0, 4096
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 8192
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 16384
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 32768
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 65536
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 240(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 131072
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 232(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 262144
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 224(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 1000(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 216(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 992(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 992(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 984(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 208(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 976(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 968(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 968(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 960(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 960(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 952(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 944(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 912(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 176(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 896(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 880(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 864(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a0, a3, s9
-; RV64IMZBS-NEXT: sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s0, a3, s0
-; RV64IMZBS-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s1, a3, s1
-; RV64IMZBS-NEXT: and a0, a3, s2
-; RV64IMZBS-NEXT: and s9, a3, s3
-; RV64IMZBS-NEXT: and a5, a3, s4
-; RV64IMZBS-NEXT: and s3, a3, s5
-; RV64IMZBS-NEXT: and s0, a3, s6
-; RV64IMZBS-NEXT: and a7, a3, s7
-; RV64IMZBS-NEXT: and t2, a3, s8
-; RV64IMZBS-NEXT: andi a6, a3, 64
-; RV64IMZBS-NEXT: andi t1, a3, 512
-; RV64IMZBS-NEXT: andi a3, a3, 1024
-; RV64IMZBS-NEXT: mul a6, a1, a6
-; RV64IMZBS-NEXT: sd a6, 88(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a6, a1, t1
-; RV64IMZBS-NEXT: sd a6, 80(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, t3
-; RV64IMZBS-NEXT: sd a3, 72(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, t4
-; RV64IMZBS-NEXT: sd a3, 64(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, s10
-; RV64IMZBS-NEXT: sd a3, 152(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, s11
-; RV64IMZBS-NEXT: sd a3, 880(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, ra
-; RV64IMZBS-NEXT: sd a3, 56(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, a4
-; RV64IMZBS-NEXT: sd a3, 864(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 936(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a2, a1, a2
+; RV64IMZBS-NEXT: slli a0, a0, 63
+; RV64IMZBS-NEXT: srli a2, a2, 63
+; RV64IMZBS-NEXT: seqz a2, a2
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: and a0, a2, a0
+; RV64IMZBS-NEXT: sd a0, 520(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 2
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 1
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 456(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 4
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 2
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 448(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 8
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 3
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 440(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 16
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 4
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 424(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 32
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 5
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 408(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 64
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 6
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 432(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 128
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 7
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 384(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 256
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 8
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 376(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 512
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 9
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 400(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 1024
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 10
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 416(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: not a0, t3
+; RV64IMZBS-NEXT: bexti a2, a0, 11
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 11
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 336(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 12
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 12
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 13
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 13
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 352(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 14
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 14
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 15
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 15
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 392(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 16
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 16
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 17
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 17
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 18
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 18
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 19
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 19
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 320(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 20
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 20
+; RV64IMZBS-NEXT: and a2, a2, a3
; RV64IMZBS-NEXT: sd a2, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 912(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 984(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 240(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
+; RV64IMZBS-NEXT: bexti a2, a0, 21
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 21
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 360(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 22
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 22
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 232(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 23
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 23
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 24
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 24
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 25
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 25
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 26
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 26
+; RV64IMZBS-NEXT: and a2, a2, a3
; RV64IMZBS-NEXT: sd a2, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 232(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
+; RV64IMZBS-NEXT: bexti a2, a0, 27
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 27
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 296(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 28
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 28
+; RV64IMZBS-NEXT: and a2, a2, a3
; RV64IMZBS-NEXT: sd a2, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 224(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 896(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 952(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 1000(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s10, a1, a2
-; RV64IMZBS-NEXT: ld a2, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s8, a1, a2
-; RV64IMZBS-NEXT: ld a2, 216(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 992(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 208(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 904(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 968(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 976(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 960(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 200(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s6, a1, a2
-; RV64IMZBS-NEXT: ld a2, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s5, a1, a2
-; RV64IMZBS-NEXT: ld a2, 192(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 184(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 872(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 176(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 168(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 968(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 992(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s4, a1, a2
-; RV64IMZBS-NEXT: ld a2, 136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s2, a1, a2
-; RV64IMZBS-NEXT: ld a2, 120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
+; RV64IMZBS-NEXT: bexti a2, a0, 29
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 29
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 168(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 30
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 30
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sraiw a2, t3, 31
+; RV64IMZBS-NEXT: seqz a2, a2
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 31
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 192(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 32
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 32
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 33
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 33
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 34
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 34
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 35
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 35
+; RV64IMZBS-NEXT: and a2, a2, a3
; RV64IMZBS-NEXT: sd a2, 240(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 96(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a2, a1, s1
-; RV64IMZBS-NEXT: sd a2, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a0, a1, s9
-; RV64IMZBS-NEXT: sd a0, 944(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a0, a1, a5
-; RV64IMZBS-NEXT: sd a0, 960(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul s3, a1, s3
-; RV64IMZBS-NEXT: mul s1, a1, s0
-; RV64IMZBS-NEXT: mul s7, a1, a7
-; RV64IMZBS-NEXT: mul a0, a1, t2
-; RV64IMZBS-NEXT: sd a0, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 832(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 816(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, a0, s0
-; RV64IMZBS-NEXT: ld a0, 808(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 792(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s9, a0, a1
-; RV64IMZBS-NEXT: ld a0, 800(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 784(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: bexti a2, a0, 36
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 36
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 37
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 37
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 112(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 38
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 38
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 88(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 39
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 39
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 120(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 40
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 40
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 128(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 41
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 41
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 42
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 42
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 144(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 43
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 43
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 160(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 44
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 44
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 45
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 45
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 184(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 46
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 46
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 47
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 47
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 48
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 48
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 49
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 49
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 48(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 50
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 50
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 51
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 51
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 64(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 52
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli s0, t0, 52
+; RV64IMZBS-NEXT: and a2, a2, s0
+; RV64IMZBS-NEXT: sd a2, 72(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 53
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 53
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 96(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 54
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 54
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 104(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 55
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli s1, t0, 55
+; RV64IMZBS-NEXT: and a2, a2, s1
+; RV64IMZBS-NEXT: sd a2, 80(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 56
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 56
+; RV64IMZBS-NEXT: and s7, a2, a3
+; RV64IMZBS-NEXT: bexti a2, a0, 57
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a4, t0, 57
+; RV64IMZBS-NEXT: and s6, a2, a4
+; RV64IMZBS-NEXT: bexti a4, a0, 58
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: slli a5, t0, 58
+; RV64IMZBS-NEXT: and s8, a4, a5
+; RV64IMZBS-NEXT: bexti a5, a0, 59
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: slli a6, t0, 59
+; RV64IMZBS-NEXT: and s9, a5, a6
+; RV64IMZBS-NEXT: bexti a6, a0, 60
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: slli a7, t0, 60
+; RV64IMZBS-NEXT: and s10, a6, a7
+; RV64IMZBS-NEXT: bexti a7, a0, 61
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: slli a1, t0, 61
+; RV64IMZBS-NEXT: and s11, a7, a1
+; RV64IMZBS-NEXT: bexti a0, a0, 62
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a1, t0, 62
+; RV64IMZBS-NEXT: and a0, a0, a1
+; RV64IMZBS-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a1, t3, 1
+; RV64IMZBS-NEXT: seqz a1, a1
+; RV64IMZBS-NEXT: addi a1, a1, -1
+; RV64IMZBS-NEXT: and a0, a1, t0
+; RV64IMZBS-NEXT: slli t0, t0, 63
+; RV64IMZBS-NEXT: srli a1, t3, 63
+; RV64IMZBS-NEXT: seqz a1, a1
+; RV64IMZBS-NEXT: addi a1, a1, -1
+; RV64IMZBS-NEXT: and a1, a1, t0
+; RV64IMZBS-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: ld a1, 912(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s5, t4, a1
+; RV64IMZBS-NEXT: ld a1, 904(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 896(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, a1, a2
+; RV64IMZBS-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, a1, s4
+; RV64IMZBS-NEXT: xor t5, s2, t5
+; RV64IMZBS-NEXT: xor t6, t1, t2
+; RV64IMZBS-NEXT: ld a1, 768(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s3, a1
+; RV64IMZBS-NEXT: ld a1, 728(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 720(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, a1, s1
+; RV64IMZBS-NEXT: ld a1, 664(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 656(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, a1, a2
+; RV64IMZBS-NEXT: ld a1, 592(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 576(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, a1, a2
+; RV64IMZBS-NEXT: ld a1, 536(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 528(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s4, a1, a2
+; RV64IMZBS-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 464(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, a1, a2
+; RV64IMZBS-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t2, a0, a1
-; RV64IMZBS-NEXT: ld a0, 512(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 504(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, a0, a1
-; RV64IMZBS-NEXT: ld a0, 496(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t0, a0
-; RV64IMZBS-NEXT: xor t5, t5, t6
-; RV64IMZBS-NEXT: ld a0, 472(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 464(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, a0, a1
; RV64IMZBS-NEXT: ld a0, 448(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a5, 440(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a0, a5
-; RV64IMZBS-NEXT: ld a0, 424(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a0, a1
-; RV64IMZBS-NEXT: ld a0, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a0, a1
-; RV64IMZBS-NEXT: ld a0, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, a0, a1
-; RV64IMZBS-NEXT: ld a0, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, a0, a1
-; RV64IMZBS-NEXT: ld a0, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a2, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 408(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a1, a2
-; RV64IMZBS-NEXT: ld a2, 72(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a3, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 384(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a3, 376(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 56(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a4, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a3, 336(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 328(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: ld a4, 40(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld ra, 32(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, ra
+; RV64IMZBS-NEXT: ld a4, 288(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a5, 272(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, a5
+; RV64IMZBS-NEXT: ld a5, 232(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a6, 216(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, a6
+; RV64IMZBS-NEXT: ld a6, 168(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a7, 152(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a6, a7
+; RV64IMZBS-NEXT: ld a7, 112(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t0, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, a7, t0
+; RV64IMZBS-NEXT: ld t0, 32(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 16(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor ra, ra, s11
-; RV64IMZBS-NEXT: xor s8, s10, s8
-; RV64IMZBS-NEXT: xor s5, s6, s5
-; RV64IMZBS-NEXT: xor s2, s4, s2
-; RV64IMZBS-NEXT: xor s1, s3, s1
-; RV64IMZBS-NEXT: xor s0, s0, s9
-; RV64IMZBS-NEXT: ld s3, 824(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t2, s3
-; RV64IMZBS-NEXT: ld s3, 576(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t3, s3
-; RV64IMZBS-NEXT: ld s3, 560(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t4, s3
-; RV64IMZBS-NEXT: ld s3, 552(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, t5, s3
-; RV64IMZBS-NEXT: ld s3, 536(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, t6, s3
-; RV64IMZBS-NEXT: ld s3, 520(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, s3
-; RV64IMZBS-NEXT: ld s3, 480(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, s3
-; RV64IMZBS-NEXT: ld s3, 456(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, s3
-; RV64IMZBS-NEXT: ld s3, 432(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, s3
-; RV64IMZBS-NEXT: xor t1, t1, a0
-; RV64IMZBS-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, a1, a0
-; RV64IMZBS-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, a0
-; RV64IMZBS-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, a0
-; RV64IMZBS-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, a0
-; RV64IMZBS-NEXT: ld a0, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s3, ra, a0
-; RV64IMZBS-NEXT: ld a0, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s4, s8, a0
-; RV64IMZBS-NEXT: ld a0, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s5, s5, a0
-; RV64IMZBS-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s2, s2, a0
-; RV64IMZBS-NEXT: xor s1, s1, s7
-; RV64IMZBS-NEXT: xor t2, s0, t2
-; RV64IMZBS-NEXT: ld a0, 640(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t3, a0
-; RV64IMZBS-NEXT: ld a0, 624(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t4, a0
-; RV64IMZBS-NEXT: ld a0, 608(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, t5, a0
-; RV64IMZBS-NEXT: ld a0, 592(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, t6, a0
-; RV64IMZBS-NEXT: ld a0, 568(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, a0
-; RV64IMZBS-NEXT: ld a0, 544(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, a0
-; RV64IMZBS-NEXT: ld a0, 528(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, a0
-; RV64IMZBS-NEXT: ld a0, 488(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a0, t0, a0
-; RV64IMZBS-NEXT: xor t0, t1, a1
-; RV64IMZBS-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, a1
-; RV64IMZBS-NEXT: ld a1, 864(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, a1
-; RV64IMZBS-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, a1
-; RV64IMZBS-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, s3, a1
-; RV64IMZBS-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s4, a1
-; RV64IMZBS-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s3, s5, a1
-; RV64IMZBS-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s2, s2, a1
-; RV64IMZBS-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, s1, a1
-; RV64IMZBS-NEXT: xor t2, t2, t3
-; RV64IMZBS-NEXT: ld t3, 704(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t4, t3
-; RV64IMZBS-NEXT: ld t4, 672(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, ra
+; RV64IMZBS-NEXT: xor s6, s7, s6
+; RV64IMZBS-NEXT: xor t3, s5, t3
+; RV64IMZBS-NEXT: ld s5, 888(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t4, s5
+; RV64IMZBS-NEXT: ld s5, 848(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, t5, s5
+; RV64IMZBS-NEXT: ld s5, 816(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, t6, s5
+; RV64IMZBS-NEXT: ld s5, 792(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s0, s5
+; RV64IMZBS-NEXT: ld s5, 752(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s1, s5
+; RV64IMZBS-NEXT: ld s5, 688(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, s2, s5
+; RV64IMZBS-NEXT: ld s5, 624(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, s3, s5
+; RV64IMZBS-NEXT: ld s5, 544(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s4, s4, s5
+; RV64IMZBS-NEXT: ld s5, 480(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s5
+; RV64IMZBS-NEXT: xor a0, t2, a0
+; RV64IMZBS-NEXT: ld t2, 432(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a1, t2
+; RV64IMZBS-NEXT: ld t2, 400(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, t2
+; RV64IMZBS-NEXT: ld t2, 352(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a3, t2
+; RV64IMZBS-NEXT: ld t2, 304(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, t2
+; RV64IMZBS-NEXT: ld t2, 256(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, t2
+; RV64IMZBS-NEXT: ld t2, 192(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a6, t2
+; RV64IMZBS-NEXT: ld t2, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, a7, t2
+; RV64IMZBS-NEXT: ld t2, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, t2
+; RV64IMZBS-NEXT: xor t2, s6, s8
+; RV64IMZBS-NEXT: xor t3, t3, t4
+; RV64IMZBS-NEXT: ld t4, 864(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t4, t5, t4
-; RV64IMZBS-NEXT: ld t5, 656(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t5, 840(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t5, t6, t5
-; RV64IMZBS-NEXT: ld t6, 632(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t6
-; RV64IMZBS-NEXT: ld t6, 600(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t6
-; RV64IMZBS-NEXT: ld t6, 584(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t6
-; RV64IMZBS-NEXT: xor a2, t0, a2
-; RV64IMZBS-NEXT: ld t0, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, t0
-; RV64IMZBS-NEXT: ld t0, 912(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, t0
-; RV64IMZBS-NEXT: ld t0, 896(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: ld t1, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, s0, t1
-; RV64IMZBS-NEXT: ld t6, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, s3, t6
-; RV64IMZBS-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s2, s0
-; RV64IMZBS-NEXT: xor t2, t2, t3
-; RV64IMZBS-NEXT: ld t3, 744(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t4, t3
-; RV64IMZBS-NEXT: ld t4, 712(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t5, t4
-; RV64IMZBS-NEXT: ld t5, 680(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t5
-; RV64IMZBS-NEXT: ld t5, 648(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t5
-; RV64IMZBS-NEXT: ld t5, 616(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t5
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 984(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t6, 824(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, s0, t6
+; RV64IMZBS-NEXT: ld s0, 776(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s1, s0
+; RV64IMZBS-NEXT: ld s1, 704(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s2, s1
+; RV64IMZBS-NEXT: ld s2, 632(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, s3, s2
+; RV64IMZBS-NEXT: ld s3, 552(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, s4, s3
+; RV64IMZBS-NEXT: ld s4, 488(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s4
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 368(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 320(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t0, a4
-; RV64IMZBS-NEXT: ld t0, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: ld t1, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t6, t1
-; RV64IMZBS-NEXT: ld t5, 336(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 264(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 200(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 128(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: ld a7, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t0, a7
+; RV64IMZBS-NEXT: xor t0, t2, s9
+; RV64IMZBS-NEXT: xor t2, t3, t4
+; RV64IMZBS-NEXT: ld t3, 872(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
+; RV64IMZBS-NEXT: ld t4, 832(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 784(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t5, s0, t5
-; RV64IMZBS-NEXT: xor t2, t2, t3
-; RV64IMZBS-NEXT: ld t3, 760(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t4, t3
-; RV64IMZBS-NEXT: ld t4, 728(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t4
-; RV64IMZBS-NEXT: ld t4, 696(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t4
-; RV64IMZBS-NEXT: ld t4, 664(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t4
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 1000(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t6, 712(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, s1, t6
+; RV64IMZBS-NEXT: ld s0, 640(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s2, s0
+; RV64IMZBS-NEXT: ld s1, 560(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s3, s1
+; RV64IMZBS-NEXT: ld s2, 496(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s2
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 344(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 280(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: ld a4, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t0, a4
-; RV64IMZBS-NEXT: ld t0, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: ld t1, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t5, t1
-; RV64IMZBS-NEXT: xor t2, t2, t3
-; RV64IMZBS-NEXT: ld t3, 776(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t3
-; RV64IMZBS-NEXT: ld t3, 736(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t3
-; RV64IMZBS-NEXT: ld t3, 688(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t3
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 1008(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 208(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 136(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 56(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: xor a7, t0, s10
+; RV64IMZBS-NEXT: xor t0, t2, t3
+; RV64IMZBS-NEXT: ld t2, 856(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t4, t2
+; RV64IMZBS-NEXT: ld t3, 800(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
+; RV64IMZBS-NEXT: ld t4, 736(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 648(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, s0, t5
+; RV64IMZBS-NEXT: ld t6, 568(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, s1, t6
+; RV64IMZBS-NEXT: ld s0, 504(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s0
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 296(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 224(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t0, a4
-; RV64IMZBS-NEXT: ld t0, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: xor a5, t2, a5
-; RV64IMZBS-NEXT: ld t1, 768(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t1
-; RV64IMZBS-NEXT: ld t1, 720(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t1
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 992(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 144(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: xor a6, a7, s11
+; RV64IMZBS-NEXT: xor a7, t0, t2
+; RV64IMZBS-NEXT: ld t0, 808(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t3, t0
+; RV64IMZBS-NEXT: ld t2, 744(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t4, t2
+; RV64IMZBS-NEXT: ld t3, 672(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
+; RV64IMZBS-NEXT: ld t4, 584(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 512(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, t5
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 160(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t0, a4
-; RV64IMZBS-NEXT: xor a5, a5, a6
-; RV64IMZBS-NEXT: ld a6, 752(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a7, a6
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 960(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 72(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: xor a6, a7, t0
+; RV64IMZBS-NEXT: ld a7, 760(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t2, a7
+; RV64IMZBS-NEXT: ld t0, 680(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t3, t0
+; RV64IMZBS-NEXT: ld t2, 600(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t4, t2
+; RV64IMZBS-NEXT: ld t3, 520(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, t3
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 176(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 96(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: xor a4, a5, a6
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: xor a0, a4, a0
+; RV64IMZBS-NEXT: ld a4, 8(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: xor a5, a6, a7
+; RV64IMZBS-NEXT: ld a6, 696(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, t0, a6
+; RV64IMZBS-NEXT: ld a7, 616(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t2, a7
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 104(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: xor a3, a5, a6
+; RV64IMZBS-NEXT: ld a5, 608(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a7, a5
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: ld a2, 840(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: sd a0, 0(a2)
-; RV64IMZBS-NEXT: sd a1, 8(a2)
-; RV64IMZBS-NEXT: ld a2, 848(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: sd a0, 0(a2)
-; RV64IMZBS-NEXT: sd a1, 8(a2)
-; RV64IMZBS-NEXT: ld ra, 1112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 1104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 1096(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s2, 1088(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s3, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s4, 1072(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s5, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s6, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s7, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s8, 1040(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s9, 1032(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s10, 1024(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 1016(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 1120
+; RV64IMZBS-NEXT: xor a3, a3, a5
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: xor a1, a3, t1
+; RV64IMZBS-NEXT: xor a0, a0, a4
+; RV64IMZBS-NEXT: ld a2, 920(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: sd a1, 0(a2)
+; RV64IMZBS-NEXT: sd a0, 8(a2)
+; RV64IMZBS-NEXT: ld a2, 928(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: sd a1, 0(a2)
+; RV64IMZBS-NEXT: sd a0, 8(a2)
+; RV64IMZBS-NEXT: ld ra, 1032(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 1024(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 1016(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 1008(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 1000(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s4, 992(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s5, 984(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s6, 976(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s7, 968(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s8, 960(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s9, 952(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s10, 944(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s11, 936(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 1040
; RV64IMZBS-NEXT: ret
%xy = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %x, <2 x i64> %y)
%yx = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %y, <2 x i64> %x)
@@ -18607,5550 +20098,5609 @@ define void @mul_use_commutative_clmul_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0
;
; RV32IM-LABEL: mul_use_commutative_clmul_v2i64:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -816
-; RV32IM-NEXT: sw ra, 812(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s0, 808(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s1, 804(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s2, 800(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s3, 796(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s4, 792(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s5, 788(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s6, 784(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s7, 780(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s8, 776(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s9, 772(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s10, 768(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s11, 764(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a3, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a2, 684(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 0(a1)
-; RV32IM-NEXT: lw a5, 4(a1)
-; RV32IM-NEXT: lw a2, 8(a1)
-; RV32IM-NEXT: sw a2, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 12(a1)
-; RV32IM-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 0(a0)
-; RV32IM-NEXT: lw a2, 4(a0)
-; RV32IM-NEXT: lw s10, 8(a0)
-; RV32IM-NEXT: lw a0, 12(a0)
-; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 16
+; RV32IM-NEXT: addi sp, sp, -704
+; RV32IM-NEXT: sw ra, 700(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s0, 696(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 692(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s2, 688(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s3, 684(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s4, 680(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s5, 676(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s6, 672(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s7, 668(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s8, 664(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s9, 660(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s10, 656(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s11, 652(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a3, 620(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s2, 0(a1)
+; RV32IM-NEXT: lw t4, 4(a1)
+; RV32IM-NEXT: lw t5, 8(a1)
+; RV32IM-NEXT: lw t3, 12(a1)
+; RV32IM-NEXT: lw t6, 0(a0)
+; RV32IM-NEXT: lw s1, 4(a0)
+; RV32IM-NEXT: lw t2, 8(a0)
+; RV32IM-NEXT: lw s6, 12(a0)
+; RV32IM-NEXT: lui a1, 16
; RV32IM-NEXT: li a0, 1
-; RV32IM-NEXT: lui s6, 1
-; RV32IM-NEXT: addi t0, a3, -256
-; RV32IM-NEXT: sw t0, 732(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a6, a1, 8
-; RV32IM-NEXT: srli a7, a1, 24
-; RV32IM-NEXT: and a6, a6, t0
-; RV32IM-NEXT: or a6, a6, a7
-; RV32IM-NEXT: sw a6, 680(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli a0, a0, 11
-; RV32IM-NEXT: andi a3, a5, 2
-; RV32IM-NEXT: sw a3, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 1
-; RV32IM-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 4
-; RV32IM-NEXT: sw a3, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 8
-; RV32IM-NEXT: sw a3, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 16
-; RV32IM-NEXT: sw a3, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 32
-; RV32IM-NEXT: sw a3, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 64
-; RV32IM-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 128
-; RV32IM-NEXT: sw a3, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 256
-; RV32IM-NEXT: sw a3, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 512
-; RV32IM-NEXT: sw a3, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a5, 1024
-; RV32IM-NEXT: sw a3, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a0, 728(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a5, a0
-; RV32IM-NEXT: sw a3, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a5, s6
-; RV32IM-NEXT: sw a3, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s4, 2
-; RV32IM-NEXT: and a3, a5, s4
-; RV32IM-NEXT: sw a3, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s3, 4
-; RV32IM-NEXT: and a3, a5, s3
-; RV32IM-NEXT: sw a3, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s2, 8
-; RV32IM-NEXT: and a3, a5, s2
-; RV32IM-NEXT: sw a3, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t6, 16
-; RV32IM-NEXT: and a3, a5, t6
-; RV32IM-NEXT: sw a3, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s1, 32
-; RV32IM-NEXT: and a3, a5, s1
-; RV32IM-NEXT: sw a3, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t5, 64
-; RV32IM-NEXT: and a3, a5, t5
-; RV32IM-NEXT: sw a3, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t4, 128
-; RV32IM-NEXT: and a3, a5, t4
-; RV32IM-NEXT: sw a3, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t3, 256
-; RV32IM-NEXT: and a3, a5, t3
-; RV32IM-NEXT: sw a3, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t1, 512
-; RV32IM-NEXT: and a3, a5, t1
-; RV32IM-NEXT: sw a3, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a6, 1024
-; RV32IM-NEXT: and a3, a5, a6
-; RV32IM-NEXT: sw a3, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a7, 2048
-; RV32IM-NEXT: and a3, a5, a7
-; RV32IM-NEXT: sw a3, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 4096
-; RV32IM-NEXT: and a3, a5, a3
-; RV32IM-NEXT: sw a3, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s11, 8192
-; RV32IM-NEXT: and a3, a5, s11
-; RV32IM-NEXT: sw a3, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s0, 16384
-; RV32IM-NEXT: and a3, a5, s0
-; RV32IM-NEXT: sw a3, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t2, 32768
-; RV32IM-NEXT: and a3, a5, t2
-; RV32IM-NEXT: sw a3, 372(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t0, 65536
-; RV32IM-NEXT: and a3, a5, t0
-; RV32IM-NEXT: sw a3, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 131072
-; RV32IM-NEXT: and a3, a5, a3
-; RV32IM-NEXT: sw a3, 364(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui ra, 262144
-; RV32IM-NEXT: and a3, a5, ra
-; RV32IM-NEXT: sw a3, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 524288
-; RV32IM-NEXT: and a5, a5, a3
-; RV32IM-NEXT: sw a5, 356(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 2
-; RV32IM-NEXT: sw a5, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 1
-; RV32IM-NEXT: sw a5, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 4
-; RV32IM-NEXT: sw a5, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 8
-; RV32IM-NEXT: sw a5, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 16
-; RV32IM-NEXT: sw a5, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 32
-; RV32IM-NEXT: sw a5, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 64
-; RV32IM-NEXT: sw a5, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s9, a4, 128
-; RV32IM-NEXT: sw s9, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a4, 256
-; RV32IM-NEXT: sw a5, 352(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s8, a4, 512
-; RV32IM-NEXT: sw s8, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s7, a4, 1024
-; RV32IM-NEXT: sw s7, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s5, a4, a0
-; RV32IM-NEXT: sw s5, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s6, a4, s6
-; RV32IM-NEXT: and s4, a4, s4
-; RV32IM-NEXT: sw s4, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a4, s3
-; RV32IM-NEXT: sw s3, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s2, a4, s2
-; RV32IM-NEXT: sw s2, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t6, a4, t6
-; RV32IM-NEXT: sw t6, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s1, a4, s1
-; RV32IM-NEXT: and t5, a4, t5
-; RV32IM-NEXT: sw t5, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t4, a4, t4
-; RV32IM-NEXT: sw t4, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t3, a4, t3
-; RV32IM-NEXT: sw t3, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t1, a4, t1
-; RV32IM-NEXT: sw t1, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a6, a4, a6
-; RV32IM-NEXT: sw a6, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a7, a4, a7
-; RV32IM-NEXT: lui a5, 4096
-; RV32IM-NEXT: and a5, a4, a5
-; RV32IM-NEXT: sw a5, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s11, a4, s11
-; RV32IM-NEXT: sw s11, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s0, a4, s0
-; RV32IM-NEXT: and t2, a4, t2
-; RV32IM-NEXT: and t0, a4, t0
-; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and a0, a4, a0
-; RV32IM-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and ra, a4, ra
-; RV32IM-NEXT: and a3, a4, a3
-; RV32IM-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
+; RV32IM-NEXT: lui s4, 1
+; RV32IM-NEXT: lui s5, 64
+; RV32IM-NEXT: lui ra, 128
+; RV32IM-NEXT: addi t1, a1, -256
+; RV32IM-NEXT: slli s0, a0, 11
+; RV32IM-NEXT: srli a5, t6, 8
+; RV32IM-NEXT: srli a6, t6, 24
+; RV32IM-NEXT: and a3, t6, t1
+; RV32IM-NEXT: slli a0, t6, 24
+; RV32IM-NEXT: srli a4, s2, 8
+; RV32IM-NEXT: srli a1, s2, 24
+; RV32IM-NEXT: and a7, s2, t1
+; RV32IM-NEXT: slli a2, s2, 24
+; RV32IM-NEXT: and s3, s2, s0
+; RV32IM-NEXT: sw s3, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, s2, s4
+; RV32IM-NEXT: and s7, s2, s5
+; RV32IM-NEXT: sw s7, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: or a5, a5, a6
+; RV32IM-NEXT: sw a5, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a6, s2, ra
+; RV32IM-NEXT: slli a3, a3, 8
+; RV32IM-NEXT: and a4, a4, t1
+; RV32IM-NEXT: slli a7, a7, 8
+; RV32IM-NEXT: or a0, a0, a3
; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s9
-; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s9, 352(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, s9
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s8
-; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s7
-; RV32IM-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s5
-; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s6
-; RV32IM-NEXT: mv s5, s6
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s4
+; RV32IM-NEXT: mul a0, s1, s3
+; RV32IM-NEXT: or a1, a4, a1
+; RV32IM-NEXT: sw a1, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, s1, t0
+; RV32IM-NEXT: or a2, a2, a7
+; RV32IM-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s1, s7
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s3
-; RV32IM-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s2
-; RV32IM-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t6
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s1
-; RV32IM-NEXT: mv t6, s1
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t5
-; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t4
-; RV32IM-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t3
-; RV32IM-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t1
-; RV32IM-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, a6
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, a7
-; RV32IM-NEXT: mv a6, a7
-; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, a5
+; RV32IM-NEXT: mul a0, s1, a6
+; RV32IM-NEXT: xor a0, a2, a0
+; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 32768
+; RV32IM-NEXT: lui a2, 16384
+; RV32IM-NEXT: and a1, s2, a2
+; RV32IM-NEXT: sw a1, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s8, s2, a0
+; RV32IM-NEXT: lui s3, 32768
+; RV32IM-NEXT: mul a0, s1, a1
+; RV32IM-NEXT: mul a1, s1, s8
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s11
-; RV32IM-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s0
-; RV32IM-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t2
-; RV32IM-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t0
-; RV32IM-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a5, a2, a0
-; RV32IM-NEXT: mul t3, a2, ra
-; RV32IM-NEXT: mv a7, ra
-; RV32IM-NEXT: mul a0, a2, a3
-; RV32IM-NEXT: mv a2, a3
-; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a0
-; RV32IM-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t4, a1, a0
-; RV32IM-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t5, a1, a0
-; RV32IM-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s1, a1, a0
-; RV32IM-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s2, a1, a0
-; RV32IM-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s3, a1, a0
-; RV32IM-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s4, a1, a0
-; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s6, a1, a0
-; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: and a0, t4, s0
+; RV32IM-NEXT: mul a0, t6, a0
+; RV32IM-NEXT: and a1, t4, s4
+; RV32IM-NEXT: mul a1, t6, a1
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s7, a1, a0
-; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s8, a1, a0
-; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: and a0, t4, s5
+; RV32IM-NEXT: mul a0, t6, a0
+; RV32IM-NEXT: and a1, t4, ra
+; RV32IM-NEXT: mul a1, t6, a1
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, a1, a0
-; RV32IM-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s11, a1, a0
-; RV32IM-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: and a0, t4, a2
+; RV32IM-NEXT: mul a0, t6, a0
+; RV32IM-NEXT: and a1, t4, s3
+; RV32IM-NEXT: mul a1, t6, a1
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul ra, a1, a0
-; RV32IM-NEXT: lw a0, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
+; RV32IM-NEXT: srli a0, t2, 8
+; RV32IM-NEXT: and a0, a0, t1
+; RV32IM-NEXT: srli a1, t2, 24
+; RV32IM-NEXT: or a0, a0, a1
; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 380(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 368(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 364(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s9
-; RV32IM-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s5
-; RV32IM-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t6
-; RV32IM-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a6
-; RV32IM-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s0
-; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t2
-; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t0
-; RV32IM-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a7
-; RV32IM-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a2
-; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 732(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, a1, a2
-; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: and a0, t2, t1
+; RV32IM-NEXT: sw t1, 612(sp) # 4-byte Folded Spill
; RV32IM-NEXT: slli a0, a0, 8
+; RV32IM-NEXT: slli a1, t2, 24
; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a0, a4, 8
-; RV32IM-NEXT: and a0, a0, a2
-; RV32IM-NEXT: srli a1, a4, 24
+; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli a0, t5, 8
+; RV32IM-NEXT: and a0, a0, t1
+; RV32IM-NEXT: srli a1, t5, 24
; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, a4, a2
-; RV32IM-NEXT: slli a4, a4, 24
+; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, t5, t1
; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a0, a4, a0
-; RV32IM-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: slli a1, t5, 24
+; RV32IM-NEXT: or t1, a1, a0
+; RV32IM-NEXT: mv a3, s0
+; RV32IM-NEXT: sw s0, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s9, t5, s0
+; RV32IM-NEXT: lui a7, 1
+; RV32IM-NEXT: and s4, t5, a7
+; RV32IM-NEXT: mul a0, s6, s9
+; RV32IM-NEXT: mul a1, s6, s4
+; RV32IM-NEXT: xor s11, a0, a1
+; RV32IM-NEXT: lui a5, 64
+; RV32IM-NEXT: and s10, t5, a5
+; RV32IM-NEXT: and a1, t5, ra
+; RV32IM-NEXT: mul a0, s6, s10
+; RV32IM-NEXT: mul a2, s6, a1
+; RV32IM-NEXT: xor s7, a0, a2
+; RV32IM-NEXT: lui a4, 16384
+; RV32IM-NEXT: and s5, t5, a4
+; RV32IM-NEXT: and a0, t5, s3
+; RV32IM-NEXT: mul a2, s6, s5
+; RV32IM-NEXT: mul s0, s6, a0
+; RV32IM-NEXT: xor s0, a2, s0
+; RV32IM-NEXT: and a2, t3, a3
+; RV32IM-NEXT: mul a2, t2, a2
+; RV32IM-NEXT: and a3, t3, a7
+; RV32IM-NEXT: mul a3, t2, a3
+; RV32IM-NEXT: xor a3, a2, a3
+; RV32IM-NEXT: and a2, t3, a5
+; RV32IM-NEXT: mul a2, t2, a2
+; RV32IM-NEXT: and ra, t3, ra
+; RV32IM-NEXT: mul ra, t2, ra
+; RV32IM-NEXT: xor a2, a2, ra
+; RV32IM-NEXT: and ra, t3, a4
+; RV32IM-NEXT: mul ra, t2, ra
+; RV32IM-NEXT: and s3, t3, s3
+; RV32IM-NEXT: mul s3, t2, s3
+; RV32IM-NEXT: xor a7, ra, s3
+; RV32IM-NEXT: lw a4, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul s3, t6, a4
+; RV32IM-NEXT: mul t0, t6, t0
+; RV32IM-NEXT: xor a4, s3, t0
+; RV32IM-NEXT: sw a4, 368(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul t0, t6, a4
+; RV32IM-NEXT: mul a6, t6, a6
+; RV32IM-NEXT: xor a4, t0, a6
+; RV32IM-NEXT: sw a4, 352(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a5, t6, a4
+; RV32IM-NEXT: mul a4, t6, s8
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: sw a4, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t2, s9
+; RV32IM-NEXT: mul a5, t2, s4
+; RV32IM-NEXT: xor a4, a4, a5
+; RV32IM-NEXT: sw a4, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t2, s10
+; RV32IM-NEXT: mul a1, t2, a1
+; RV32IM-NEXT: xor a1, a4, a1
+; RV32IM-NEXT: sw a1, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, t2, s5
+; RV32IM-NEXT: mul a0, t2, a0
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a0, a1, a0
+; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a0, a1, a0
+; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 2
+; RV32IM-NEXT: and a0, s2, a0
+; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, a0
+; RV32IM-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 256
+; RV32IM-NEXT: and a0, s2, a0
+; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t0, 256
+; RV32IM-NEXT: mul a0, s1, a0
+; RV32IM-NEXT: lw a5, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a5, a0
+; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a6, 65536
+; RV32IM-NEXT: and a0, s2, a6
+; RV32IM-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s1, a0
+; RV32IM-NEXT: lw s3, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, s3, a5
+; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s3, 2
+; RV32IM-NEXT: and a5, t4, s3
+; RV32IM-NEXT: mul a5, t6, a5
+; RV32IM-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a5
+; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, t4, t0
+; RV32IM-NEXT: mul a5, t6, a5
+; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a5
+; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, t4, a6
+; RV32IM-NEXT: lui t0, 65536
+; RV32IM-NEXT: mul a5, t6, a5
+; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a5
+; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a0, a1, a0
+; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, a5, t3
-; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t4, a3
+; RV32IM-NEXT: or a0, t1, a0
+; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, t5, s3
+; RV32IM-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s6, a0
+; RV32IM-NEXT: xor a0, s11, a5
+; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 256
+; RV32IM-NEXT: and a0, t5, a5
+; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, s6, a0
+; RV32IM-NEXT: xor a0, s7, a6
+; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, t5, t0
+; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, s6, a0
+; RV32IM-NEXT: mv ra, s6
+; RV32IM-NEXT: xor a0, s0, a6
+; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mv t1, t3
+; RV32IM-NEXT: and a6, t3, s3
+; RV32IM-NEXT: mul a6, t2, a6
+; RV32IM-NEXT: xor a0, a3, a6
+; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, t3, a5
+; RV32IM-NEXT: mul a3, t2, a3
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: sw a2, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, t3, t0
+; RV32IM-NEXT: mul a2, t2, a2
+; RV32IM-NEXT: xor a0, a7, a2
; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t5, s1
-; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, s2, s3
-; RV32IM-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, s4, s6
+; RV32IM-NEXT: andi a2, s2, 2
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t4, 2
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 1
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, t6, 1
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 4
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t4, 4
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 2
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, t6, 2
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 8
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t4, 8
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 3
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, t6, 3
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 16
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t4, 16
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 4
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, t6, 4
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 32
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t4, 32
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 5
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, t6, 5
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 64
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t4, 64
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 6
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, t6, 6
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 128
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t4, 128
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 7
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, t6, 7
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 256
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t4, 256
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 8
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, t6, 8
+; RV32IM-NEXT: and a0, a3, a6
; RV32IM-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, s7, s8
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s2, 512
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: andi a3, t4, 512
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: slli a6, s1, 9
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a6, t6, 9
+; RV32IM-NEXT: and a0, a3, a6
+; RV32IM-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a2, a6
+; RV32IM-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s11, 4
+; RV32IM-NEXT: lui a0, 8
+; RV32IM-NEXT: lui a1, 32
+; RV32IM-NEXT: lui a2, 512
+; RV32IM-NEXT: lui a4, 1024
+; RV32IM-NEXT: lui t3, 2048
+; RV32IM-NEXT: lui s6, 4096
+; RV32IM-NEXT: lui s7, 8192
+; RV32IM-NEXT: lui s8, 131072
+; RV32IM-NEXT: lui s9, 262144
+; RV32IM-NEXT: lui s10, 524288
+; RV32IM-NEXT: andi a3, s2, 1
+; RV32IM-NEXT: andi a5, s2, 1024
+; RV32IM-NEXT: sw a5, 344(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a6, s2, s11
+; RV32IM-NEXT: and a7, s2, a0
+; RV32IM-NEXT: lui a5, 16
+; RV32IM-NEXT: and t0, s2, a5
+; RV32IM-NEXT: and s0, s2, a1
+; RV32IM-NEXT: and s3, s2, a2
+; RV32IM-NEXT: and s4, s2, a4
+; RV32IM-NEXT: and s5, s2, t3
+; RV32IM-NEXT: and s6, s2, s6
+; RV32IM-NEXT: and s7, s2, s7
+; RV32IM-NEXT: and s8, s2, s8
+; RV32IM-NEXT: and s9, s2, s9
+; RV32IM-NEXT: and s2, s2, s10
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: andi s10, t4, 1
+; RV32IM-NEXT: seqz s10, s10
+; RV32IM-NEXT: and s11, t4, s11
+; RV32IM-NEXT: mul s11, t6, s11
+; RV32IM-NEXT: sw s11, 280(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, t4, a0
+; RV32IM-NEXT: mul a0, t6, s11
; RV32IM-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t1, s11
-; RV32IM-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, ra, a0
+; RV32IM-NEXT: and s11, t4, a5
+; RV32IM-NEXT: mul a0, t6, s11
+; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, t4, a1
+; RV32IM-NEXT: mul a0, t6, s11
+; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, t4, a2
+; RV32IM-NEXT: mul a0, t6, s11
+; RV32IM-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, t4, a4
+; RV32IM-NEXT: mul a0, t6, s11
; RV32IM-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a0, s10, 8
-; RV32IM-NEXT: and a0, a0, a2
-; RV32IM-NEXT: srli a1, s10, 24
-; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s5, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: andi a0, s5, 2
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 1
-; RV32IM-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 4
-; RV32IM-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 8
-; RV32IM-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 16
-; RV32IM-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 32
-; RV32IM-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 64
-; RV32IM-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 128
-; RV32IM-NEXT: sw a0, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 256
+; RV32IM-NEXT: and s11, t4, t3
+; RV32IM-NEXT: mul a0, t6, s11
+; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 4096
+; RV32IM-NEXT: and s11, t4, a0
+; RV32IM-NEXT: mul a0, t6, s11
+; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 8192
+; RV32IM-NEXT: and s11, t4, a0
+; RV32IM-NEXT: mul a0, t6, s11
+; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 131072
+; RV32IM-NEXT: and s11, t4, a0
+; RV32IM-NEXT: mul a0, t6, s11
; RV32IM-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 512
-; RV32IM-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, s5, 1024
+; RV32IM-NEXT: lui a0, 262144
+; RV32IM-NEXT: and s11, t4, a0
+; RV32IM-NEXT: mul a0, t6, s11
+; RV32IM-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a0, 524288
+; RV32IM-NEXT: and s11, t4, a0
+; RV32IM-NEXT: mul a0, t6, s11
+; RV32IM-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a4, t6, a0
+; RV32IM-NEXT: mul a0, s1, a6
; RV32IM-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a1, s5, a0
-; RV32IM-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 1
-; RV32IM-NEXT: and a1, s5, a3
-; RV32IM-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t1, 2
-; RV32IM-NEXT: and a1, s5, t1
-; RV32IM-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t0, 4
-; RV32IM-NEXT: and a1, s5, t0
-; RV32IM-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t4, 8
-; RV32IM-NEXT: and a1, s5, t4
-; RV32IM-NEXT: sw a1, 196(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s9, 16
-; RV32IM-NEXT: and a1, s5, s9
-; RV32IM-NEXT: sw a1, 188(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t3, 32
-; RV32IM-NEXT: and a1, s5, t3
+; RV32IM-NEXT: mul a0, t6, a6
+; RV32IM-NEXT: sw a0, 276(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, a7
+; RV32IM-NEXT: sw a0, 288(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t6, a7
+; RV32IM-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, t0
+; RV32IM-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t6, t0
+; RV32IM-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s0
+; RV32IM-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t6, s0
+; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a1, t6, a0
+; RV32IM-NEXT: mul a0, s1, s3
+; RV32IM-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t6, s3
+; RV32IM-NEXT: sw a0, 264(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s4
+; RV32IM-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t6, s4
+; RV32IM-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s5
+; RV32IM-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t6, s5
+; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s6
+; RV32IM-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t6, s6
+; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s1, s7
+; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t6, s7
+; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a0, t6, a0
+; RV32IM-NEXT: mul a2, s1, s8
+; RV32IM-NEXT: sw a2, 240(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, t6, s8
+; RV32IM-NEXT: sw a2, 256(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s1, s9
+; RV32IM-NEXT: sw a2, 268(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, t6, s9
+; RV32IM-NEXT: sw a2, 292(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s1, s2
+; RV32IM-NEXT: sw a2, 324(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, t6, s2
+; RV32IM-NEXT: sw a2, 372(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: addi s10, s10, -1
+; RV32IM-NEXT: slli a6, s1, 10
+; RV32IM-NEXT: and s1, a3, s1
+; RV32IM-NEXT: sw s1, 220(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, s10, t6
+; RV32IM-NEXT: sw a2, 228(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, a3, t6
+; RV32IM-NEXT: sw a2, 248(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t6, t6, 10
+; RV32IM-NEXT: andi a3, t4, 1024
+; RV32IM-NEXT: lw a2, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and a5, a2, a6
+; RV32IM-NEXT: sw a5, 296(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a3, a3, t6
+; RV32IM-NEXT: sw a3, 304(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, a2, t6
+; RV32IM-NEXT: sw a2, 344(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 368(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a4
+; RV32IM-NEXT: sw a2, 368(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 352(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: sw a1, 352(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 320(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sw a0, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 2
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t1, 2
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 1
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 200(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t2, 1
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 4
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t1, 4
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 2
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 188(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t2, 2
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 216(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 8
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t1, 8
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 3
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 148(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t2, 3
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 164(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 16
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t1, 16
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 4
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t2, 4
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 172(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 32
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t1, 32
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 5
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t2, 5
+; RV32IM-NEXT: and a1, a1, a2
; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t5, 64
-; RV32IM-NEXT: and a1, s5, t5
-; RV32IM-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t6, 128
-; RV32IM-NEXT: and a1, s5, t6
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 64
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t1, 64
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 6
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 168(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t2, 6
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 128
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t1, 128
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 7
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t2, 7
+; RV32IM-NEXT: and a1, a1, a2
; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s1, 256
-; RV32IM-NEXT: and a1, s5, s1
-; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s7, 512
-; RV32IM-NEXT: and a1, s5, s7
-; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s4, 1024
-; RV32IM-NEXT: and a1, s5, s4
-; RV32IM-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s3, 2048
-; RV32IM-NEXT: and a1, s5, s3
-; RV32IM-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a6, 4096
-; RV32IM-NEXT: and a1, s5, a6
-; RV32IM-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 8192
-; RV32IM-NEXT: and a2, s5, a1
-; RV32IM-NEXT: sw a2, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a7, 16384
-; RV32IM-NEXT: and a2, s5, a7
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 256
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t1, 256
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 8
+; RV32IM-NEXT: and a2, a0, a2
; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a4, 32768
-; RV32IM-NEXT: and a2, s5, a4
-; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 65536
-; RV32IM-NEXT: and a2, s5, a2
-; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s8, 131072
-; RV32IM-NEXT: and a2, s5, s8
-; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 262144
-; RV32IM-NEXT: and a2, s5, a2
-; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 524288
-; RV32IM-NEXT: and a2, s5, a2
-; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s11, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: andi a2, s11, 2
-; RV32IM-NEXT: sw a2, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 1
-; RV32IM-NEXT: sw a2, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 4
-; RV32IM-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 8
-; RV32IM-NEXT: sw a2, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 16
-; RV32IM-NEXT: sw a2, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 32
-; RV32IM-NEXT: sw a2, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, s11, 64
-; RV32IM-NEXT: sw a2, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s2, s11, 128
-; RV32IM-NEXT: andi a5, s11, 256
-; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi ra, s11, 512
-; RV32IM-NEXT: andi a2, s11, 1024
-; RV32IM-NEXT: and s6, s11, a0
-; RV32IM-NEXT: and a3, s11, a3
-; RV32IM-NEXT: sw a3, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, s11, t1
-; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t2, s11, t0
-; RV32IM-NEXT: and s0, s11, t4
-; RV32IM-NEXT: and t0, s11, s9
-; RV32IM-NEXT: and t4, s11, t3
-; RV32IM-NEXT: and t5, s11, t5
-; RV32IM-NEXT: and t6, s11, t6
-; RV32IM-NEXT: and s1, s11, s1
-; RV32IM-NEXT: and t3, s11, s7
-; RV32IM-NEXT: and a0, s11, s4
-; RV32IM-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s4, s11, s3
-; RV32IM-NEXT: and s3, s11, a6
-; RV32IM-NEXT: and s9, s11, a1
-; RV32IM-NEXT: and a7, s11, a7
-; RV32IM-NEXT: and s7, s11, a4
-; RV32IM-NEXT: lui a1, 65536
-; RV32IM-NEXT: and a6, s11, a1
-; RV32IM-NEXT: and t1, s11, s8
-; RV32IM-NEXT: lui a1, 262144
-; RV32IM-NEXT: and s5, s11, a1
-; RV32IM-NEXT: lui a1, 524288
-; RV32IM-NEXT: and s8, s11, a1
-; RV32IM-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s11, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, s11
-; RV32IM-NEXT: sw a4, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 216(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a4
-; RV32IM-NEXT: sw a4, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s2
-; RV32IM-NEXT: sw a4, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a5
-; RV32IM-NEXT: sw a4, 204(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, ra
-; RV32IM-NEXT: sw a4, 292(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a2
-; RV32IM-NEXT: sw a4, 172(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s6
-; RV32IM-NEXT: sw a4, 200(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a3
-; RV32IM-NEXT: sw a4, 192(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a1, a3
-; RV32IM-NEXT: sw a4, 288(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t2
-; RV32IM-NEXT: sw a4, 168(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s0
-; RV32IM-NEXT: sw a4, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t0
-; RV32IM-NEXT: sw a4, 184(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t4
-; RV32IM-NEXT: sw a4, 180(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t5
-; RV32IM-NEXT: sw a4, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t6
-; RV32IM-NEXT: sw a4, 156(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s1
-; RV32IM-NEXT: sw a4, 324(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t3
-; RV32IM-NEXT: sw a4, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a0
-; RV32IM-NEXT: sw a4, 164(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s4
-; RV32IM-NEXT: sw a4, 160(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s3
-; RV32IM-NEXT: sw a4, 276(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s9
-; RV32IM-NEXT: sw a4, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a7
-; RV32IM-NEXT: sw a4, 320(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, s7
-; RV32IM-NEXT: sw a4, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, a6
-; RV32IM-NEXT: sw a4, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a1, t1
-; RV32IM-NEXT: sw a4, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mv a4, t1
-; RV32IM-NEXT: mul a0, a1, s5
-; RV32IM-NEXT: sw a0, 152(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a1, s8
-; RV32IM-NEXT: sw a1, 272(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 304(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
+; RV32IM-NEXT: slli a2, t2, 8
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, t5, 512
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, t1, 512
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, ra, 9
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, t2, 9
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
; RV32IM-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
+; RV32IM-NEXT: mv a7, t5
+; RV32IM-NEXT: andi a1, t5, 1
+; RV32IM-NEXT: andi a0, t5, 1024
+; RV32IM-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t3, 4
+; RV32IM-NEXT: and a4, t5, t3
+; RV32IM-NEXT: lui s9, 8
+; RV32IM-NEXT: and t0, t5, s9
+; RV32IM-NEXT: lui t4, 16
+; RV32IM-NEXT: and t6, t5, t4
+; RV32IM-NEXT: lui s8, 32
+; RV32IM-NEXT: and s1, t5, s8
+; RV32IM-NEXT: lui s4, 512
+; RV32IM-NEXT: and s3, t5, s4
+; RV32IM-NEXT: lui s5, 1024
+; RV32IM-NEXT: and s11, t5, s5
+; RV32IM-NEXT: lui s7, 2048
+; RV32IM-NEXT: and a0, t5, s7
+; RV32IM-NEXT: lui t5, 4096
+; RV32IM-NEXT: and a2, a7, t5
+; RV32IM-NEXT: lui s0, 8192
+; RV32IM-NEXT: and a3, a7, s0
+; RV32IM-NEXT: lui s6, 131072
+; RV32IM-NEXT: and a5, a7, s6
+; RV32IM-NEXT: lui s2, 262144
+; RV32IM-NEXT: and a6, a7, s2
+; RV32IM-NEXT: lui s10, 524288
+; RV32IM-NEXT: and a7, a7, s10
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a1, t1, 1
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mv a1, t1
+; RV32IM-NEXT: and t3, t1, t3
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, s9
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, t4
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 180(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, s8
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 224(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, s4
+; RV32IM-NEXT: mul s4, t2, t3
+; RV32IM-NEXT: and t3, a1, s5
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, s7
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 152(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, t5
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 208(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, s0
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 232(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, s6
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, s2
+; RV32IM-NEXT: mul t1, t2, t3
+; RV32IM-NEXT: sw t1, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, a1, s10
+; RV32IM-NEXT: mv s2, a1
+; RV32IM-NEXT: mul a1, t2, t3
+; RV32IM-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul t1, t2, a1
+; RV32IM-NEXT: mv t3, ra
+; RV32IM-NEXT: mul s9, ra, a4
+; RV32IM-NEXT: mul ra, t2, a4
+; RV32IM-NEXT: mul a1, t3, t0
+; RV32IM-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, t2, t0
+; RV32IM-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, t3, t6
+; RV32IM-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, t2, t6
+; RV32IM-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, t3, s1
+; RV32IM-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, t2, s1
+; RV32IM-NEXT: sw a1, 428(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a1, t2, a1
+; RV32IM-NEXT: mul s8, t3, s3
+; RV32IM-NEXT: mul a4, t2, s3
+; RV32IM-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t3, s11
+; RV32IM-NEXT: sw a4, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t2, s11
+; RV32IM-NEXT: sw a4, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, t3, a0
+; RV32IM-NEXT: sw a4, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t2, a0
; RV32IM-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
+; RV32IM-NEXT: mul a0, t3, a2
+; RV32IM-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t2, a2
+; RV32IM-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t3, a3
; RV32IM-NEXT: sw a0, 176(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 196(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 296(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 284(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, s10, a0
-; RV32IM-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
+; RV32IM-NEXT: mul a0, t2, a3
+; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul s11, t2, a0
+; RV32IM-NEXT: mul s6, t3, a5
+; RV32IM-NEXT: mul a0, t2, a5
+; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t3, a6
+; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t2, a6
+; RV32IM-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, t3, a7
; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s11, s10, s11
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, s10, s2
-; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
+; RV32IM-NEXT: mul a0, t2, a7
+; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t5, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi t5, t5, -1
+; RV32IM-NEXT: lw t4, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi t4, t4, -1
+; RV32IM-NEXT: slli a5, t3, 10
+; RV32IM-NEXT: and a7, t5, t3
+; RV32IM-NEXT: and t4, t4, t2
+; RV32IM-NEXT: and a2, t5, t2
+; RV32IM-NEXT: slli t2, t2, 10
+; RV32IM-NEXT: andi a6, s2, 1024
+; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: seqz s0, a0
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi s0, s0, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: and a5, s0, a5
+; RV32IM-NEXT: sw a5, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a6, t2
; RV32IM-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, s10, ra
-; RV32IM-NEXT: sw a1, 188(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, s10, a2
-; RV32IM-NEXT: sw a1, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul ra, s10, s6
-; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, s10, a0
-; RV32IM-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, s10, a3
-; RV32IM-NEXT: mul a0, s10, t2
-; RV32IM-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, s0
-; RV32IM-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t2, s10, t0
-; RV32IM-NEXT: mul t4, s10, t4
-; RV32IM-NEXT: mul t1, s10, t5
-; RV32IM-NEXT: mul a0, s10, t6
-; RV32IM-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, s1
-; RV32IM-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, t3
-; RV32IM-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t3, s10, a0
-; RV32IM-NEXT: mul t5, s10, s4
-; RV32IM-NEXT: mul t0, s10, s3
-; RV32IM-NEXT: mul a0, s10, s9
-; RV32IM-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, a7
-; RV32IM-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, s7
-; RV32IM-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, a6
-; RV32IM-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, s10, a4
-; RV32IM-NEXT: mul s5, s10, s5
-; RV32IM-NEXT: mul a4, s10, s8
-; RV32IM-NEXT: lw a1, 732(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and s3, s10, a1
-; RV32IM-NEXT: slli s10, s10, 24
-; RV32IM-NEXT: slli s3, s3, 8
-; RV32IM-NEXT: or a5, s10, s3
-; RV32IM-NEXT: lw s9, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s6, s9, 8
-; RV32IM-NEXT: and s6, s6, a1
-; RV32IM-NEXT: srli s7, s9, 24
-; RV32IM-NEXT: or a6, s6, s7
-; RV32IM-NEXT: and s7, s9, a1
-; RV32IM-NEXT: slli s9, s9, 24
-; RV32IM-NEXT: slli s7, s7, 8
-; RV32IM-NEXT: or a7, s9, s7
-; RV32IM-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, a2, a1
-; RV32IM-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, a1, a2
-; RV32IM-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 212(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, a1, a2
-; RV32IM-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 204(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s4, a1, a2
-; RV32IM-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 192(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, a1, a2
-; RV32IM-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 180(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, a1, a2
-; RV32IM-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 160(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, a1, a2
-; RV32IM-NEXT: lw a1, 152(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s9, a2, a1
-; RV32IM-NEXT: lw a1, 148(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 144(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, a2, a1
-; RV32IM-NEXT: lw a1, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, a2, a1
-; RV32IM-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 380(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 372(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 364(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 348(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: sw a1, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: sw a1, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, s11, a1
-; RV32IM-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a1, a2
-; RV32IM-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, s2, a1
-; RV32IM-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor ra, ra, a1
-; RV32IM-NEXT: xor t2, t2, t4
-; RV32IM-NEXT: xor t3, t3, t5
-; RV32IM-NEXT: xor t4, a0, s5
-; RV32IM-NEXT: lw a0, 680(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 344(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, a0, a1
-; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s5, a1, a0
-; RV32IM-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a0, s0, t2
+; RV32IM-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a0, t1
+; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a0, a1
+; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, s11
+; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 328(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 336(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 280(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 340(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 336(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 200(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a7, a0
+; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 148(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, a0, a1
+; RV32IM-NEXT: lw a0, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s10, a0, a1
+; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, a0, a1
; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 328(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor s9, a0, s9
; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 316(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor s8, a0, s8
+; RV32IM-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, a0, s6
+; RV32IM-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, t4, a0
+; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, a0, a1
+; RV32IM-NEXT: lw a0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, a0, s0
+; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, a0, s1
; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a5, a0
-; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: or a0, a7, a6
-; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a5, t6, s1
-; RV32IM-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, s3, a0
-; RV32IM-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, s4, a0
-; RV32IM-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, s6, a0
-; RV32IM-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s7, a0
+; RV32IM-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s2, a0, a1
+; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, a0, s4
+; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, a0, a1
+; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, a1, a0
+; RV32IM-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a0, a1
+; RV32IM-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, a0, a1
+; RV32IM-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, a0, a1
; RV32IM-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s8, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s9, a0
-; RV32IM-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a1, a0
+; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a1, a0
+; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 320(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a1, a0
; RV32IM-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s0, a0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s10, a0
+; RV32IM-NEXT: xor a6, a2, a0
+; RV32IM-NEXT: lw a0, 216(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 144(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: lw a2, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: xor a3, a4, ra
+; RV32IM-NEXT: lw a4, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: lw a5, 0(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw ra, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, ra, a5
+; RV32IM-NEXT: lw ra, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, ra, s5
+; RV32IM-NEXT: sw ra, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 288(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 284(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 408(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 268(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 396(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, ra, s5
+; RV32IM-NEXT: sw ra, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 388(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 388(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 332(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 384(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 316(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: lw s5, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, s5, s7
+; RV32IM-NEXT: sw s7, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s7, 168(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, s10, s7
+; RV32IM-NEXT: sw s7, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, s11, s7
+; RV32IM-NEXT: sw s7, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s7, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, s9, s7
+; RV32IM-NEXT: sw s7, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, s8, s7
+; RV32IM-NEXT: sw s7, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, s6, s7
+; RV32IM-NEXT: sw s6, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s5, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s5, t6
+; RV32IM-NEXT: sw t6, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 184(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s0, t6
+; RV32IM-NEXT: sw t6, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s1, t6
+; RV32IM-NEXT: sw t6, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s2, t6
+; RV32IM-NEXT: sw t6, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s3, t6
+; RV32IM-NEXT: sw t6, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s4, t6
+; RV32IM-NEXT: sw t6, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a7, t5, a7
+; RV32IM-NEXT: sw a7, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t0, a7
+; RV32IM-NEXT: sw a7, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t1, a7
+; RV32IM-NEXT: sw a7, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t2, a7
+; RV32IM-NEXT: sw a7, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t3, a7
+; RV32IM-NEXT: sw a7, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 292(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t4, a7
+; RV32IM-NEXT: sw a7, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a6, a0
+; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 204(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, a1, a0
+; RV32IM-NEXT: lw a0, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a0
+; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a3, a0
+; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a4, a0
+; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a5, a0
; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lui t0, 61681
+; RV32IM-NEXT: addi t0, t0, -241
+; RV32IM-NEXT: sw t0, 424(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a3, a0, 4
+; RV32IM-NEXT: and a4, a0, t0
+; RV32IM-NEXT: and a3, a3, t0
+; RV32IM-NEXT: slli a4, a4, 4
+; RV32IM-NEXT: or a3, a3, a4
+; RV32IM-NEXT: sw a3, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a4, a0, 4
+; RV32IM-NEXT: and a7, a0, t0
+; RV32IM-NEXT: and a4, a4, t0
+; RV32IM-NEXT: slli a7, a7, 4
+; RV32IM-NEXT: or a0, a4, a7
+; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s9, a0, a1
+; RV32IM-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s8, a1, a0
+; RV32IM-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, a1, a0
+; RV32IM-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, a1, a0
+; RV32IM-NEXT: lw a0, 324(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, a0, a1
+; RV32IM-NEXT: lw a0, 304(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, s11
-; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, a2, a0
-; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, s2, a0
-; RV32IM-NEXT: xor ra, ra, a3
-; RV32IM-NEXT: xor a0, t2, t1
-; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t3, t0
-; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t4, a4
-; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t5, s5
-; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, a1, a0
-; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s2, a1, a0
+; RV32IM-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s0, a0
+; RV32IM-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, a0
+; RV32IM-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, ra, a0
+; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a7, a0, 4
+; RV32IM-NEXT: and t4, a0, t0
+; RV32IM-NEXT: and a7, a7, t0
+; RV32IM-NEXT: slli t4, t4, 4
+; RV32IM-NEXT: or s5, a7, t4
+; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t4, a0, 4
+; RV32IM-NEXT: and ra, a0, t0
+; RV32IM-NEXT: and t4, t4, t0
+; RV32IM-NEXT: slli ra, ra, 4
+; RV32IM-NEXT: or s6, t4, ra
+; RV32IM-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s9, a0, a1
-; RV32IM-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, a0, a1
+; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s5, a1, a0
-; RV32IM-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s1, a0
-; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a1, a0
+; RV32IM-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, a1, a0
+; RV32IM-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, a1, a0
-; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a1, a0
+; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, a1, a0
-; RV32IM-NEXT: xor s4, a5, a6
-; RV32IM-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, a7, a0
-; RV32IM-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t6, a0
-; RV32IM-NEXT: lw a0, 156(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, s6, a0
-; RV32IM-NEXT: lw a0, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, a1, a0
-; RV32IM-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, a0, a1
-; RV32IM-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, a1, a0
-; RV32IM-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a1, a0
-; RV32IM-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a0, a1
+; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, a1, a0
+; RV32IM-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a7, a1, a0
-; RV32IM-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor t0, a1, a0
-; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, a0, a1
-; RV32IM-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a0, a1
+; RV32IM-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a0
+; RV32IM-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a4, 508(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a4, a4, a0
-; RV32IM-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a5, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 504(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a5, a5, a0
-; RV32IM-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a0
-; RV32IM-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a3, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a0
-; RV32IM-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, a0, s10
-; RV32IM-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, s11, a0
-; RV32IM-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor ra, ra, a0
-; RV32IM-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, s6, a1
-; RV32IM-NEXT: lw s6, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 640(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s6, s7
-; RV32IM-NEXT: sw s6, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s6, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s7, s6
-; RV32IM-NEXT: sw s6, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s6, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, s7, s6
-; RV32IM-NEXT: lw s6, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, s8, s6
-; RV32IM-NEXT: xor s5, s9, s5
-; RV32IM-NEXT: lw s6, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s1, s6
-; RV32IM-NEXT: lw s6, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, s2, s6
-; RV32IM-NEXT: lw s6, 616(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, s3, s6
-; RV32IM-NEXT: xor s0, s4, s0
-; RV32IM-NEXT: sw s0, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t3, s0
-; RV32IM-NEXT: lw s0, 324(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t4, s0
-; RV32IM-NEXT: lw s0, 320(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, t5, s0
-; RV32IM-NEXT: xor s0, t6, t2
-; RV32IM-NEXT: lw t2, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, t2
-; RV32IM-NEXT: lw t2, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, a0, s11
+; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a2, a0
+; RV32IM-NEXT: lw a1, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: lw a2, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw ra, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, ra, a2
+; RV32IM-NEXT: lw ra, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s10, ra
+; RV32IM-NEXT: xor s8, s9, s8
+; RV32IM-NEXT: lw s9, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, s4, s9
+; RV32IM-NEXT: lw s9, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, s7, s9
+; RV32IM-NEXT: xor s2, s3, s2
+; RV32IM-NEXT: lw s3, 512(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s0, s3
+; RV32IM-NEXT: lw s3, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, s3
+; RV32IM-NEXT: xor t4, t6, t4
+; RV32IM-NEXT: lw t6, 160(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, t5, t6
+; RV32IM-NEXT: lw t6, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, t3, t6
+; RV32IM-NEXT: xor t1, t2, t1
+; RV32IM-NEXT: lw t2, 224(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a7, a7, t2
-; RV32IM-NEXT: lw t2, 304(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw t2, 208(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor t0, t0, t2
-; RV32IM-NEXT: xor a4, t1, a4
-; RV32IM-NEXT: lw t1, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, t1
-; RV32IM-NEXT: lw t1, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t1
-; RV32IM-NEXT: lw t1, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t1
-; RV32IM-NEXT: xor t2, s10, s11
-; RV32IM-NEXT: lw t1, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, ra, t1
-; RV32IM-NEXT: lw t1, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, t1
-; RV32IM-NEXT: lw t1, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, a1, t1
-; RV32IM-NEXT: lui a1, 61681
-; RV32IM-NEXT: addi s4, a1, -241
-; RV32IM-NEXT: lw a1, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s9, a1, 4
-; RV32IM-NEXT: and s10, a1, s4
-; RV32IM-NEXT: and s9, s9, s4
-; RV32IM-NEXT: slli s10, s10, 4
-; RV32IM-NEXT: or s9, s9, s10
-; RV32IM-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s10, a1, 4
-; RV32IM-NEXT: and s11, a1, s4
-; RV32IM-NEXT: and s10, s10, s4
-; RV32IM-NEXT: slli s11, s11, 4
-; RV32IM-NEXT: or s10, s10, s11
-; RV32IM-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, a1, s6
-; RV32IM-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, s7, a1
-; RV32IM-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, s8, a1
-; RV32IM-NEXT: xor s1, s5, s1
-; RV32IM-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, s2, a1
-; RV32IM-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, s3, a1
-; RV32IM-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s5, a1, 4
-; RV32IM-NEXT: and s11, a1, s4
-; RV32IM-NEXT: and s5, s5, s4
-; RV32IM-NEXT: slli s11, s11, 4
-; RV32IM-NEXT: or s5, s5, s11
-; RV32IM-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s11, a1, 4
-; RV32IM-NEXT: and ra, a1, s4
-; RV32IM-NEXT: and s11, s11, s4
-; RV32IM-NEXT: slli ra, ra, 4
-; RV32IM-NEXT: or s11, s11, ra
-; RV32IM-NEXT: lw a1, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, a1, t3
-; RV32IM-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t4, a1
-; RV32IM-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, t5, a1
-; RV32IM-NEXT: xor a6, s0, a6
-; RV32IM-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, a1
-; RV32IM-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, a1
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: lw a1, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a1
-; RV32IM-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a1
-; RV32IM-NEXT: xor a5, t2, t6
-; RV32IM-NEXT: lw a1, 720(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a6, a3
+; RV32IM-NEXT: lw a6, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a6
+; RV32IM-NEXT: lw a6, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: xor a0, s11, a0
+; RV32IM-NEXT: lw a6, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a6
+; RV32IM-NEXT: lw a6, 156(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a6
+; RV32IM-NEXT: xor a6, s8, s4
+; RV32IM-NEXT: lw t2, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, s7, t2
+; RV32IM-NEXT: xor t6, s2, s0
+; RV32IM-NEXT: lw s0, 520(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s1, s0
+; RV32IM-NEXT: xor t4, t4, t5
+; RV32IM-NEXT: lw t5, 176(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, t3, t5
+; RV32IM-NEXT: xor a7, t1, a7
+; RV32IM-NEXT: lw t1, 232(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, t1
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: lw a4, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a5, a4
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: lw a1, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, a1
-; RV32IM-NEXT: xor t2, s6, s7
-; RV32IM-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, s8, a1
-; RV32IM-NEXT: xor s0, s1, s2
-; RV32IM-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s3, a1
-; RV32IM-NEXT: xor t3, t3, t4
-; RV32IM-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t5, a1
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, t0, a1
-; RV32IM-NEXT: xor a2, a4, a2
-; RV32IM-NEXT: lw a1, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a1
-; RV32IM-NEXT: xor a0, a5, a0
-; RV32IM-NEXT: lw a1, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, t1, a1
-; RV32IM-NEXT: xor a5, t2, t6
-; RV32IM-NEXT: xor s0, s0, s1
-; RV32IM-NEXT: xor t0, t3, t4
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: xor a3, a2, a3
-; RV32IM-NEXT: xor a0, a0, a4
+; RV32IM-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
; RV32IM-NEXT: lui a2, 209715
-; RV32IM-NEXT: addi t5, a2, 819
-; RV32IM-NEXT: srli a4, s9, 2
-; RV32IM-NEXT: and a7, s9, t5
-; RV32IM-NEXT: and a4, a4, t5
-; RV32IM-NEXT: slli a7, a7, 2
-; RV32IM-NEXT: or a4, a4, a7
-; RV32IM-NEXT: srli a7, s10, 2
-; RV32IM-NEXT: and t1, s10, t5
-; RV32IM-NEXT: and a7, a7, t5
+; RV32IM-NEXT: addi s4, a2, 819
+; RV32IM-NEXT: lw a2, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a5, a2, 2
+; RV32IM-NEXT: and t1, a2, s4
+; RV32IM-NEXT: and a5, a5, s4
; RV32IM-NEXT: slli t1, t1, 2
-; RV32IM-NEXT: or a7, a7, t1
-; RV32IM-NEXT: lw a1, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a1
-; RV32IM-NEXT: lw a1, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, s0, a1
-; RV32IM-NEXT: srli t2, s5, 2
-; RV32IM-NEXT: and t3, s5, t5
-; RV32IM-NEXT: and t2, t2, t5
-; RV32IM-NEXT: slli t3, t3, 2
-; RV32IM-NEXT: or t2, t2, t3
-; RV32IM-NEXT: srli t3, s11, 2
-; RV32IM-NEXT: and t4, s11, t5
-; RV32IM-NEXT: and t3, t3, t5
-; RV32IM-NEXT: slli t4, t4, 2
-; RV32IM-NEXT: or t3, t3, t4
-; RV32IM-NEXT: lw a1, 680(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, a1
-; RV32IM-NEXT: lw a1, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, a1
-; RV32IM-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a3, a1
-; RV32IM-NEXT: sw a1, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a5, a5, t1
+; RV32IM-NEXT: lw a2, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t1, a2, 2
+; RV32IM-NEXT: and t5, a2, s4
+; RV32IM-NEXT: and t1, t1, s4
+; RV32IM-NEXT: slli t5, t5, 2
+; RV32IM-NEXT: or t1, t1, t5
+; RV32IM-NEXT: xor a6, a6, t2
+; RV32IM-NEXT: xor t2, t6, s0
+; RV32IM-NEXT: srli t5, s5, 2
+; RV32IM-NEXT: and t6, s5, s4
+; RV32IM-NEXT: and t5, t5, s4
+; RV32IM-NEXT: slli t6, t6, 2
+; RV32IM-NEXT: or t5, t5, t6
+; RV32IM-NEXT: srli t6, s6, 2
+; RV32IM-NEXT: and s0, s6, s4
+; RV32IM-NEXT: and t6, t6, s4
+; RV32IM-NEXT: slli s0, s0, 2
+; RV32IM-NEXT: or t6, t6, s0
+; RV32IM-NEXT: xor t3, t4, t3
+; RV32IM-NEXT: xor a7, a7, t0
+; RV32IM-NEXT: xor a3, a3, a4
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t1, a5
-; RV32IM-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, a6, t0
-; RV32IM-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t4, 349525
-; RV32IM-NEXT: addi t4, t4, 1365
-; RV32IM-NEXT: srli a0, a4, 1
-; RV32IM-NEXT: and a4, a4, t4
-; RV32IM-NEXT: and a0, a0, t4
-; RV32IM-NEXT: slli a4, a4, 1
-; RV32IM-NEXT: or a0, a0, a4
-; RV32IM-NEXT: srli a3, a7, 1
-; RV32IM-NEXT: and a4, a7, t4
-; RV32IM-NEXT: and a3, a3, t4
+; RV32IM-NEXT: lw a1, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a6, a1
+; RV32IM-NEXT: lw a2, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, t2, a2
+; RV32IM-NEXT: lw a2, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, t3, a2
+; RV32IM-NEXT: lw a2, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, a2
+; RV32IM-NEXT: lw a2, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a3, a2
+; RV32IM-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a0, ra
+; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a1, a4, a1
+; RV32IM-NEXT: sw a1, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a7, a6
+; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s6, 349525
+; RV32IM-NEXT: addi s6, s6, 1365
+; RV32IM-NEXT: srli a0, a5, 1
+; RV32IM-NEXT: and a1, a5, s6
+; RV32IM-NEXT: and a0, a0, s6
+; RV32IM-NEXT: slli a1, a1, 1
+; RV32IM-NEXT: or a1, a0, a1
+; RV32IM-NEXT: srli a0, t1, 1
+; RV32IM-NEXT: and a3, t1, s6
+; RV32IM-NEXT: and a0, a0, s6
+; RV32IM-NEXT: slli a3, a3, 1
+; RV32IM-NEXT: or a0, a0, a3
+; RV32IM-NEXT: srli a3, t5, 1
+; RV32IM-NEXT: and a4, t5, s6
+; RV32IM-NEXT: and a3, a3, s6
; RV32IM-NEXT: slli a4, a4, 1
; RV32IM-NEXT: or a4, a3, a4
-; RV32IM-NEXT: srli a1, t2, 1
-; RV32IM-NEXT: and a6, t2, t4
-; RV32IM-NEXT: and a1, a1, t4
-; RV32IM-NEXT: slli a6, a6, 1
-; RV32IM-NEXT: or a6, a1, a6
-; RV32IM-NEXT: srli a1, t3, 1
-; RV32IM-NEXT: and a7, t3, t4
-; RV32IM-NEXT: and a1, a1, t4
-; RV32IM-NEXT: slli a7, a7, 1
-; RV32IM-NEXT: or s11, a1, a7
-; RV32IM-NEXT: andi a1, a4, 2
-; RV32IM-NEXT: sw a1, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a7, a4, 1
-; RV32IM-NEXT: andi a5, a4, 4
-; RV32IM-NEXT: andi t0, a4, 8
-; RV32IM-NEXT: andi t2, a4, 16
-; RV32IM-NEXT: andi t3, a4, 32
-; RV32IM-NEXT: andi t1, a4, 64
-; RV32IM-NEXT: andi a1, a4, 128
-; RV32IM-NEXT: sw a1, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t6, a4, 256
-; RV32IM-NEXT: andi s0, a4, 512
-; RV32IM-NEXT: andi s2, a4, 1024
-; RV32IM-NEXT: lw a2, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and s8, a4, a2
-; RV32IM-NEXT: lui a1, 1
-; RV32IM-NEXT: and s9, a4, a1
-; RV32IM-NEXT: lui s1, 2
-; RV32IM-NEXT: and s1, a4, s1
-; RV32IM-NEXT: lui a1, 4
-; RV32IM-NEXT: and s3, a4, a1
-; RV32IM-NEXT: lui a1, 8
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 700(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli a3, t6, 1
+; RV32IM-NEXT: and a5, t6, s6
+; RV32IM-NEXT: and a3, a3, s6
+; RV32IM-NEXT: slli a5, a5, 1
+; RV32IM-NEXT: or a2, a3, a5
+; RV32IM-NEXT: lw t2, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a3, a0, t2
+; RV32IM-NEXT: mul a3, a1, a3
+; RV32IM-NEXT: lui a6, 1
+; RV32IM-NEXT: and a7, a0, a6
+; RV32IM-NEXT: mul a7, a1, a7
+; RV32IM-NEXT: xor s2, a3, a7
+; RV32IM-NEXT: lui t0, 64
+; RV32IM-NEXT: and a3, a0, t0
+; RV32IM-NEXT: mul a3, a1, a3
+; RV32IM-NEXT: lui t3, 128
+; RV32IM-NEXT: and a7, a0, t3
+; RV32IM-NEXT: mul a7, a1, a7
+; RV32IM-NEXT: xor a3, a3, a7
+; RV32IM-NEXT: lui t5, 4096
+; RV32IM-NEXT: and a7, a0, t5
+; RV32IM-NEXT: mul a7, a1, a7
+; RV32IM-NEXT: lui t4, 8192
+; RV32IM-NEXT: and t1, a0, t4
+; RV32IM-NEXT: mul t1, a1, t1
+; RV32IM-NEXT: xor a7, a7, t1
+; RV32IM-NEXT: and t1, a2, t2
+; RV32IM-NEXT: and t2, a2, a6
+; RV32IM-NEXT: mul t1, a4, t1
+; RV32IM-NEXT: mul t2, a4, t2
+; RV32IM-NEXT: xor a5, t1, t2
+; RV32IM-NEXT: sw a5, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t1, a2, t0
+; RV32IM-NEXT: and t2, a2, t3
+; RV32IM-NEXT: mul t1, a4, t1
+; RV32IM-NEXT: mul t2, a4, t2
+; RV32IM-NEXT: xor a5, t1, t2
+; RV32IM-NEXT: sw a5, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t1, a2, t5
+; RV32IM-NEXT: and t2, a2, t4
+; RV32IM-NEXT: mul t1, a4, t1
+; RV32IM-NEXT: mul t2, a4, t2
+; RV32IM-NEXT: xor a5, t1, t2
+; RV32IM-NEXT: sw a5, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 2
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 1
+; RV32IM-NEXT: and a5, t1, t2
+; RV32IM-NEXT: sw a5, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 4
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 2
+; RV32IM-NEXT: and a5, t1, t2
+; RV32IM-NEXT: sw a5, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 8
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 3
+; RV32IM-NEXT: and a5, t1, t2
+; RV32IM-NEXT: sw a5, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 16
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 4
+; RV32IM-NEXT: and a5, t1, t2
+; RV32IM-NEXT: sw a5, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 32
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 5
+; RV32IM-NEXT: and a5, t1, t2
+; RV32IM-NEXT: sw a5, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 64
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 6
+; RV32IM-NEXT: and a5, t1, t2
+; RV32IM-NEXT: sw a5, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 128
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 7
+; RV32IM-NEXT: and a5, t1, t2
+; RV32IM-NEXT: sw a5, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 256
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 8
+; RV32IM-NEXT: and a5, t1, t2
+; RV32IM-NEXT: sw a5, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 512
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: slli t2, a1, 9
+; RV32IM-NEXT: and a5, t1, t2
+; RV32IM-NEXT: sw a5, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a0, 1
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: lui a5, 2
+; RV32IM-NEXT: and t2, a0, a5
+; RV32IM-NEXT: mul t2, a1, t2
+; RV32IM-NEXT: lui a5, 4
+; RV32IM-NEXT: and t3, a0, a5
+; RV32IM-NEXT: mul a5, a1, t3
+; RV32IM-NEXT: sw a5, 512(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a6, 8
+; RV32IM-NEXT: and t3, a0, a6
+; RV32IM-NEXT: mul a5, a1, t3
+; RV32IM-NEXT: sw a5, 536(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lui s5, 16
-; RV32IM-NEXT: and s5, a4, s5
-; RV32IM-NEXT: lui s6, 32
-; RV32IM-NEXT: and s6, a4, s6
-; RV32IM-NEXT: lui s7, 64
-; RV32IM-NEXT: and s7, a4, s7
-; RV32IM-NEXT: lui s10, 128
-; RV32IM-NEXT: and s10, a4, s10
-; RV32IM-NEXT: lui ra, 256
-; RV32IM-NEXT: and ra, a4, ra
-; RV32IM-NEXT: lui a1, 512
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 1024
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 2048
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 4096
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 8192
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 16384
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 32768
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 65536
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 131072
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 262144
-; RV32IM-NEXT: and a3, a4, a1
-; RV32IM-NEXT: sw a3, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 524288
-; RV32IM-NEXT: and a3, a4, a3
-; RV32IM-NEXT: lw a4, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, a7
-; RV32IM-NEXT: sw a4, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, a5
-; RV32IM-NEXT: sw a4, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, t0
-; RV32IM-NEXT: sw a4, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, t2
-; RV32IM-NEXT: sw a4, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, t3
-; RV32IM-NEXT: sw a4, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, t1
-; RV32IM-NEXT: sw a4, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, t6
-; RV32IM-NEXT: sw a4, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s0
-; RV32IM-NEXT: sw a4, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s2
-; RV32IM-NEXT: sw a4, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s8
-; RV32IM-NEXT: sw a4, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s9
-; RV32IM-NEXT: sw a4, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s1
-; RV32IM-NEXT: sw a4, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s3
-; RV32IM-NEXT: sw a4, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s5
-; RV32IM-NEXT: sw a4, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s6
-; RV32IM-NEXT: sw a4, 680(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s7
-; RV32IM-NEXT: sw a4, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, s10
-; RV32IM-NEXT: sw a4, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a4, a0, ra
-; RV32IM-NEXT: sw a4, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a4, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: sw a4, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a0, a3
-; RV32IM-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t2, s11, a2
-; RV32IM-NEXT: lui a0, 1
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 2
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 4
-; RV32IM-NEXT: and s1, s11, a0
-; RV32IM-NEXT: lui a0, 8
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 16
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 32
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 64
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 128
-; RV32IM-NEXT: and s7, s11, a0
-; RV32IM-NEXT: lui a0, 256
-; RV32IM-NEXT: and s10, s11, a0
-; RV32IM-NEXT: lui a0, 512
-; RV32IM-NEXT: and ra, s11, a0
-; RV32IM-NEXT: lui a0, 1024
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 2048
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 4096
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 8192
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 16384
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 32768
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 65536
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and a0, s11, a0
+; RV32IM-NEXT: and t3, a0, s5
+; RV32IM-NEXT: mul a5, a1, t3
+; RV32IM-NEXT: sw a5, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s8, 32
+; RV32IM-NEXT: and t3, a0, s8
+; RV32IM-NEXT: mul a5, a1, t3
+; RV32IM-NEXT: sw a5, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t0, 256
+; RV32IM-NEXT: and t3, a0, t0
+; RV32IM-NEXT: mul s0, a1, t3
+; RV32IM-NEXT: lui s7, 512
+; RV32IM-NEXT: and t3, a0, s7
+; RV32IM-NEXT: mul a5, a1, t3
+; RV32IM-NEXT: sw a5, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s9, 1024
+; RV32IM-NEXT: and t3, a0, s9
+; RV32IM-NEXT: mul a5, a1, t3
+; RV32IM-NEXT: sw a5, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s11, 2048
+; RV32IM-NEXT: and t3, a0, s11
+; RV32IM-NEXT: mul a5, a1, t3
+; RV32IM-NEXT: sw a5, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s3, 16384
+; RV32IM-NEXT: and t3, a0, s3
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: lui t6, 32768
+; RV32IM-NEXT: and t4, a0, t6
+; RV32IM-NEXT: mul a5, a1, t4
+; RV32IM-NEXT: sw a5, 496(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t5, 65536
+; RV32IM-NEXT: and t4, a0, t5
+; RV32IM-NEXT: mul a5, a1, t4
+; RV32IM-NEXT: sw a5, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui ra, 131072
+; RV32IM-NEXT: and t4, a0, ra
+; RV32IM-NEXT: mul a5, a1, t4
+; RV32IM-NEXT: sw a5, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 262144
+; RV32IM-NEXT: and t4, a0, a5
+; RV32IM-NEXT: mul t4, a1, t4
+; RV32IM-NEXT: sw t4, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s1, 524288
+; RV32IM-NEXT: and t4, a0, s1
+; RV32IM-NEXT: mul t4, a1, t4
+; RV32IM-NEXT: sw t4, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: and t1, t1, a1
+; RV32IM-NEXT: sw t1, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a1, a1, 10
+; RV32IM-NEXT: andi a0, a0, 1024
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: and a0, a0, a1
; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, s11, a1
-; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 524288
-; RV32IM-NEXT: and s2, s11, a0
-; RV32IM-NEXT: andi s8, s11, 2
-; RV32IM-NEXT: andi a0, s11, 1
-; RV32IM-NEXT: andi a1, s11, 4
-; RV32IM-NEXT: andi a2, s11, 8
-; RV32IM-NEXT: andi a3, s11, 16
-; RV32IM-NEXT: andi a4, s11, 32
-; RV32IM-NEXT: andi a5, s11, 64
-; RV32IM-NEXT: andi a7, s11, 128
-; RV32IM-NEXT: andi t0, s11, 256
-; RV32IM-NEXT: andi t1, s11, 512
-; RV32IM-NEXT: andi s11, s11, 1024
-; RV32IM-NEXT: mul s9, a6, s8
-; RV32IM-NEXT: mul s6, a6, a0
-; RV32IM-NEXT: mul s8, a6, a1
-; RV32IM-NEXT: mul s3, a6, a2
-; RV32IM-NEXT: mul s5, a6, a3
-; RV32IM-NEXT: mul s0, a6, a4
-; RV32IM-NEXT: mul a0, a6, a5
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, a7
-; RV32IM-NEXT: sw a0, 728(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t6, a6, t0
-; RV32IM-NEXT: mul t3, a6, t1
-; RV32IM-NEXT: mul a0, a6, s11
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t2
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, a6, a0
-; RV32IM-NEXT: mul s1, a6, s1
+; RV32IM-NEXT: xor a0, s2, t2
+; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a3, a3, s0
+; RV32IM-NEXT: sw a3, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a7, t3
+; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a2, 2
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a4, 1
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a2, 4
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a4, 2
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a2, 8
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a4, 3
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a2, 16
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a4, 4
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a2, 32
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a4, 5
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a2, 64
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a4, 6
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a2, 128
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a3, a4, 7
+; RV32IM-NEXT: and a0, a0, a3
+; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a2, 256
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a4, 8
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a1, a2, 512
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a7, a4, 9
+; RV32IM-NEXT: and a0, a1, a7
+; RV32IM-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a1, 2
+; RV32IM-NEXT: and a1, a2, a1
+; RV32IM-NEXT: lui a0, 4
+; RV32IM-NEXT: and a0, a2, a0
+; RV32IM-NEXT: and s2, a2, a6
+; RV32IM-NEXT: and a3, a2, s5
+; RV32IM-NEXT: and a6, a2, s8
+; RV32IM-NEXT: and a7, a2, t0
+; RV32IM-NEXT: and t0, a2, s7
+; RV32IM-NEXT: and t1, a2, s9
+; RV32IM-NEXT: and t2, a2, s11
+; RV32IM-NEXT: and t3, a2, s3
+; RV32IM-NEXT: and t4, a2, t6
+; RV32IM-NEXT: and t5, a2, t5
+; RV32IM-NEXT: and t6, a2, ra
+; RV32IM-NEXT: and s0, a2, a5
+; RV32IM-NEXT: and s1, a2, s1
+; RV32IM-NEXT: andi a5, a2, 1
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: mul a1, a4, a1
+; RV32IM-NEXT: mul s5, a4, a0
+; RV32IM-NEXT: mul s9, a4, s2
+; RV32IM-NEXT: mul a0, a4, a3
+; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, a4, a6
+; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a4, a7
+; RV32IM-NEXT: mul s3, a4, t0
+; RV32IM-NEXT: mul s8, a4, t1
+; RV32IM-NEXT: mul ra, a4, t2
+; RV32IM-NEXT: mul t3, a4, t3
+; RV32IM-NEXT: mul s2, a4, t4
+; RV32IM-NEXT: mul s7, a4, t5
+; RV32IM-NEXT: mul s11, a4, t6
+; RV32IM-NEXT: mul s0, a4, s0
+; RV32IM-NEXT: mul a0, a4, s1
+; RV32IM-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: and t1, a5, a4
+; RV32IM-NEXT: slli a4, a4, 10
+; RV32IM-NEXT: andi a2, a2, 1024
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: and t6, a2, a4
+; RV32IM-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, a0, a1
+; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a0, a7
+; RV32IM-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a0, t3
+; RV32IM-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a1, a0
+; RV32IM-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a0, a1
; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s11, a6, a0
-; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s7, a6, s7
-; RV32IM-NEXT: mul s10, a6, s10
-; RV32IM-NEXT: mul ra, a6, ra
+; RV32IM-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a0, a1
; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t0, a6, a0
-; RV32IM-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a7, a6, a0
+; RV32IM-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, a0, a1
; RV32IM-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t2, a6, a0
-; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a3, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a0
; RV32IM-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a0
; RV32IM-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s2
-; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, a1, a0
-; RV32IM-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a0
+; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, t1, a0
+; RV32IM-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 456(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a3, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a3
-; RV32IM-NEXT: lw a3, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a4
-; RV32IM-NEXT: lw a4, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a5, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: lw a5, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: xor s6, s6, s9
-; RV32IM-NEXT: xor s3, s8, s3
-; RV32IM-NEXT: xor s0, s5, s0
-; RV32IM-NEXT: xor t3, t6, t3
-; RV32IM-NEXT: xor t1, t1, s1
-; RV32IM-NEXT: xor t6, s7, s10
+; RV32IM-NEXT: lw a2, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, s1
+; RV32IM-NEXT: xor t5, t5, s5
+; RV32IM-NEXT: xor t2, t2, s3
+; RV32IM-NEXT: xor t4, t4, s2
+; RV32IM-NEXT: xor a6, t3, a6
+; RV32IM-NEXT: lw t3, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, t3
+; RV32IM-NEXT: lw t3, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, t3
+; RV32IM-NEXT: lw t3, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, t3
+; RV32IM-NEXT: lw t3, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, t3
+; RV32IM-NEXT: lw t3, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, t3
+; RV32IM-NEXT: xor a0, t1, a0
+; RV32IM-NEXT: lw t1, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, t1
+; RV32IM-NEXT: lw t1, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, t1
+; RV32IM-NEXT: xor t1, t5, s9
+; RV32IM-NEXT: xor t2, t2, s8
+; RV32IM-NEXT: xor t3, t4, s7
+; RV32IM-NEXT: xor a6, a6, a7
+; RV32IM-NEXT: lw a7, 500(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a7, t0, a7
-; RV32IM-NEXT: xor a0, s2, a0
-; RV32IM-NEXT: lw t0, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, t0
-; RV32IM-NEXT: lw t0, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t0
-; RV32IM-NEXT: lw t0, 640(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw t0, 552(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a3, a3, t0
-; RV32IM-NEXT: lw t0, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw t0, 532(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a4, a4, t0
-; RV32IM-NEXT: lw t0, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw t0, 520(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a5, a5, t0
-; RV32IM-NEXT: xor t0, s6, s3
-; RV32IM-NEXT: lw a6, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, s0, a6
-; RV32IM-NEXT: lw a6, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t3, a6
-; RV32IM-NEXT: xor t1, t1, s11
-; RV32IM-NEXT: xor t6, t6, ra
-; RV32IM-NEXT: xor a7, a7, t2
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, t6
+; RV32IM-NEXT: lw a2, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, t1, a2
+; RV32IM-NEXT: xor t0, t2, ra
+; RV32IM-NEXT: xor t1, t3, s11
+; RV32IM-NEXT: xor a6, a6, a7
+; RV32IM-NEXT: lw a7, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a7
+; RV32IM-NEXT: lw a7, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: lw a2, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: lw a3, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a4, a3
-; RV32IM-NEXT: lw a4, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a5, a4
-; RV32IM-NEXT: xor a5, t0, s0
-; RV32IM-NEXT: lw a6, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t3, a6
-; RV32IM-NEXT: lw a6, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, a6
-; RV32IM-NEXT: lw a6, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t6, a6
-; RV32IM-NEXT: lw a6, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, a6
-; RV32IM-NEXT: lw t3, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, t3
-; RV32IM-NEXT: lw t3, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, t3
-; RV32IM-NEXT: lw t3, 680(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t3
-; RV32IM-NEXT: lw t3, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t3
-; RV32IM-NEXT: lw t3, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, t3
-; RV32IM-NEXT: lw a6, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: lw a6, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, a6
-; RV32IM-NEXT: lw a6, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, a6
-; RV32IM-NEXT: lw a6, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t2, a6
-; RV32IM-NEXT: lw a6, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, a6
-; RV32IM-NEXT: lw t3, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t3
-; RV32IM-NEXT: lw t3, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, t3
-; RV32IM-NEXT: lw a6, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, a6
-; RV32IM-NEXT: lw a6, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, a6
+; RV32IM-NEXT: xor a2, t1, s0
+; RV32IM-NEXT: lw a7, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: lw a7, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a7
+; RV32IM-NEXT: xor a3, a6, a3
; RV32IM-NEXT: xor a1, a0, a1
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a4, a2
-; RV32IM-NEXT: xor a4, a5, t0
-; RV32IM-NEXT: xor a4, a4, t1
-; RV32IM-NEXT: lw a6, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, a6
-; RV32IM-NEXT: xor a1, a1, a3
-; RV32IM-NEXT: lw a3, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a3
-; RV32IM-NEXT: xor a3, a4, t2
-; RV32IM-NEXT: lw a4, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a7, a4
-; RV32IM-NEXT: lw t0, 732(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a6, a1, t0
-; RV32IM-NEXT: srli a7, a1, 8
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: and a2, a3, t0
-; RV32IM-NEXT: xor a4, a3, a4
-; RV32IM-NEXT: srli a3, a3, 8
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: xor a1, a1, t0
+; RV32IM-NEXT: lw t0, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a4, a3, t0
+; RV32IM-NEXT: srli a7, a3, 8
+; RV32IM-NEXT: xor a3, a3, a5
+; RV32IM-NEXT: and a5, a1, t0
+; RV32IM-NEXT: xor a2, a1, a2
+; RV32IM-NEXT: srli a1, a1, 8
; RV32IM-NEXT: and a7, a7, t0
-; RV32IM-NEXT: and a3, a3, t0
+; RV32IM-NEXT: and a1, a1, t0
+; RV32IM-NEXT: slli a6, a6, 24
+; RV32IM-NEXT: slli a4, a4, 8
+; RV32IM-NEXT: or a4, a6, a4
+; RV32IM-NEXT: srli a3, a3, 24
+; RV32IM-NEXT: or a3, a7, a3
; RV32IM-NEXT: slli a0, a0, 24
-; RV32IM-NEXT: slli a6, a6, 8
-; RV32IM-NEXT: or a0, a0, a6
-; RV32IM-NEXT: srli a1, a1, 24
-; RV32IM-NEXT: or a1, a7, a1
-; RV32IM-NEXT: slli a5, a5, 24
-; RV32IM-NEXT: slli a2, a2, 8
-; RV32IM-NEXT: or a2, a5, a2
-; RV32IM-NEXT: srli a4, a4, 24
-; RV32IM-NEXT: or a3, a3, a4
+; RV32IM-NEXT: slli a5, a5, 8
+; RV32IM-NEXT: or a0, a0, a5
+; RV32IM-NEXT: srli a2, a2, 24
+; RV32IM-NEXT: or a1, a1, a2
+; RV32IM-NEXT: or a3, a4, a3
; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: or a2, a2, a3
-; RV32IM-NEXT: srli a1, a0, 4
+; RV32IM-NEXT: srli a1, a3, 4
+; RV32IM-NEXT: lw a4, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a2, a3, a4
+; RV32IM-NEXT: srli a3, a0, 4
+; RV32IM-NEXT: and a0, a0, a4
+; RV32IM-NEXT: and a1, a1, a4
+; RV32IM-NEXT: and a3, a3, a4
+; RV32IM-NEXT: slli a2, a2, 4
+; RV32IM-NEXT: slli a0, a0, 4
+; RV32IM-NEXT: or a1, a1, a2
+; RV32IM-NEXT: or a0, a3, a0
+; RV32IM-NEXT: srli a2, a1, 2
+; RV32IM-NEXT: and a1, a1, s4
+; RV32IM-NEXT: srli a3, a0, 2
; RV32IM-NEXT: and a0, a0, s4
-; RV32IM-NEXT: srli a3, a2, 4
; RV32IM-NEXT: and a2, a2, s4
-; RV32IM-NEXT: and a1, a1, s4
; RV32IM-NEXT: and a3, a3, s4
-; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: slli a2, a2, 4
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: or a2, a3, a2
-; RV32IM-NEXT: srli a1, a0, 2
-; RV32IM-NEXT: and a0, a0, t5
-; RV32IM-NEXT: srli a3, a2, 2
-; RV32IM-NEXT: and a2, a2, t5
-; RV32IM-NEXT: and a1, a1, t5
-; RV32IM-NEXT: and a3, a3, t5
+; RV32IM-NEXT: slli a1, a1, 2
+; RV32IM-NEXT: or a1, a2, a1
+; RV32IM-NEXT: lui a2, 349525
+; RV32IM-NEXT: addi a2, a2, 1364
; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: lui a1, 349525
-; RV32IM-NEXT: addi a1, a1, 1364
-; RV32IM-NEXT: slli a2, a2, 2
-; RV32IM-NEXT: or a2, a3, a2
-; RV32IM-NEXT: srli a3, a0, 1
-; RV32IM-NEXT: and a0, a0, t4
-; RV32IM-NEXT: and a4, a2, t4
-; RV32IM-NEXT: srli a2, a2, 1
-; RV32IM-NEXT: and a3, a3, a1
-; RV32IM-NEXT: and a1, a2, a1
-; RV32IM-NEXT: slli a0, a0, 1
; RV32IM-NEXT: or a0, a3, a0
-; RV32IM-NEXT: slli a4, a4, 1
-; RV32IM-NEXT: or a1, a1, a4
+; RV32IM-NEXT: srli a3, a1, 1
+; RV32IM-NEXT: and a1, a1, s6
+; RV32IM-NEXT: and a4, a0, s6
; RV32IM-NEXT: srli a0, a0, 1
-; RV32IM-NEXT: lw s0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, a0, s0
+; RV32IM-NEXT: and a3, a3, a2
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: slli a1, a1, 1
+; RV32IM-NEXT: or a1, a3, a1
+; RV32IM-NEXT: slli a4, a4, 1
+; RV32IM-NEXT: or a0, a0, a4
; RV32IM-NEXT: srli a1, a1, 1
-; RV32IM-NEXT: lw s1, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, a1, s1
-; RV32IM-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s3, 720(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, a1, s0
+; RV32IM-NEXT: srli a0, a0, 1
+; RV32IM-NEXT: lw s1, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, a0, s1
+; RV32IM-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 600(sp) # 4-byte Folded Reload
; RV32IM-NEXT: sw s3, 0(a0)
; RV32IM-NEXT: sw s0, 4(a0)
-; RV32IM-NEXT: lw s2, 724(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 604(sp) # 4-byte Folded Reload
; RV32IM-NEXT: sw s2, 8(a0)
; RV32IM-NEXT: sw s1, 12(a0)
-; RV32IM-NEXT: addi a0, sp, 736
-; RV32IM-NEXT: sw s3, 736(sp)
-; RV32IM-NEXT: sw s0, 740(sp)
-; RV32IM-NEXT: sw s2, 744(sp)
-; RV32IM-NEXT: sw s1, 748(sp)
+; RV32IM-NEXT: addi a0, sp, 624
+; RV32IM-NEXT: sw s3, 624(sp)
+; RV32IM-NEXT: sw s0, 628(sp)
+; RV32IM-NEXT: sw s2, 632(sp)
+; RV32IM-NEXT: sw s1, 636(sp)
; RV32IM-NEXT: call vector_use
-; RV32IM-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
; RV32IM-NEXT: sw s3, 0(a0)
; RV32IM-NEXT: sw s0, 4(a0)
; RV32IM-NEXT: sw s2, 8(a0)
; RV32IM-NEXT: sw s1, 12(a0)
-; RV32IM-NEXT: lw ra, 812(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 808(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 804(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s2, 800(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s3, 796(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s4, 792(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s5, 788(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 784(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 780(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s8, 776(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s9, 772(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s10, 768(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s11, 764(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: addi sp, sp, 816
+; RV32IM-NEXT: lw ra, 700(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 696(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 692(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 688(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 684(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s4, 680(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 676(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s6, 672(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s7, 668(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s8, 664(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s9, 660(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 656(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s11, 652(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 704
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: mul_use_commutative_clmul_v2i64:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: addi sp, sp, -960
-; RV64IM-NEXT: sd ra, 952(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s0, 944(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s1, 936(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s2, 928(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s3, 920(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s4, 912(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s5, 904(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s6, 896(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s7, 888(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s8, 880(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s9, 872(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s10, 864(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s11, 856(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd a5, 744(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t2, a3
-; RV64IM-NEXT: mv a3, a2
-; RV64IM-NEXT: andi t5, a2, 2
-; RV64IM-NEXT: andi s0, a2, 1
-; RV64IM-NEXT: andi a7, a2, 4
-; RV64IM-NEXT: andi t1, a2, 8
-; RV64IM-NEXT: andi a2, a2, 16
-; RV64IM-NEXT: andi t6, a3, 32
-; RV64IM-NEXT: andi a5, a3, 64
-; RV64IM-NEXT: andi t0, a3, 128
-; RV64IM-NEXT: andi t3, a3, 256
-; RV64IM-NEXT: andi a6, a3, 512
-; RV64IM-NEXT: andi t4, a3, 1024
-; RV64IM-NEXT: li a4, 1
-; RV64IM-NEXT: lui s6, 4
-; RV64IM-NEXT: lui s7, 8
-; RV64IM-NEXT: lui s8, 128
-; RV64IM-NEXT: lui s9, 256
-; RV64IM-NEXT: lui s10, 8192
-; RV64IM-NEXT: lui s11, 16384
-; RV64IM-NEXT: andi s1, t2, 2
-; RV64IM-NEXT: andi s2, t2, 1
-; RV64IM-NEXT: andi s3, t2, 4
-; RV64IM-NEXT: andi s4, t2, 8
-; RV64IM-NEXT: mul t5, a0, t5
-; RV64IM-NEXT: mul s0, a0, s0
-; RV64IM-NEXT: xor t5, s0, t5
-; RV64IM-NEXT: andi s0, t2, 16
-; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: mul t1, a0, t1
-; RV64IM-NEXT: xor a7, a7, t1
-; RV64IM-NEXT: andi t1, t2, 32
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: mul t6, a0, t6
-; RV64IM-NEXT: xor a2, a2, t6
-; RV64IM-NEXT: andi t6, t2, 128
-; RV64IM-NEXT: mul t0, a0, t0
-; RV64IM-NEXT: mul t3, a0, t3
-; RV64IM-NEXT: xor t0, t0, t3
-; RV64IM-NEXT: andi t3, t2, 256
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: mul s5, a0, a6
-; RV64IM-NEXT: mul a6, a0, t4
-; RV64IM-NEXT: sd a6, 728(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a6, a1, s1
-; RV64IM-NEXT: mul t4, a1, s2
-; RV64IM-NEXT: xor a6, t4, a6
-; RV64IM-NEXT: sd a6, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a6, a4, 11
-; RV64IM-NEXT: sd a6, 760(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul t4, a1, s3
-; RV64IM-NEXT: mul s1, a1, s4
-; RV64IM-NEXT: xor a6, t4, s1
-; RV64IM-NEXT: sd a6, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t4, a3, s6
-; RV64IM-NEXT: mul s0, a1, s0
-; RV64IM-NEXT: mul t1, a1, t1
-; RV64IM-NEXT: xor a6, s0, t1
-; RV64IM-NEXT: sd a6, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t1, a3, s7
-; RV64IM-NEXT: mul t6, a1, t6
-; RV64IM-NEXT: mul t3, a1, t3
-; RV64IM-NEXT: xor a6, t6, t3
-; RV64IM-NEXT: sd a6, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t3, a3, s8
-; RV64IM-NEXT: xor a6, t5, a7
-; RV64IM-NEXT: sd a6, 720(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a7, a3, s9
-; RV64IM-NEXT: xor a5, a2, a5
-; RV64IM-NEXT: sd a5, 712(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a5, a3, s10
-; RV64IM-NEXT: xor a6, t0, s5
-; RV64IM-NEXT: sd a6, 704(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t0, a3, s11
-; RV64IM-NEXT: mul a2, a0, t4
-; RV64IM-NEXT: mul t1, a0, t1
-; RV64IM-NEXT: xor a6, a2, t1
-; RV64IM-NEXT: sd a6, 696(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 31
-; RV64IM-NEXT: sd a2, 752(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul t1, a0, t3
+; RV64IM-NEXT: addi sp, sp, -1104
+; RV64IM-NEXT: sd ra, 1096(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s0, 1088(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 1080(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s2, 1072(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s3, 1064(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s4, 1056(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s5, 1048(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s6, 1040(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s7, 1032(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s8, 1024(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s9, 1016(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s10, 1008(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s11, 1000(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a5, 872(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 864(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s9, a0, 1
+; RV64IM-NEXT: andi s10, a2, 2
+; RV64IM-NEXT: andi a4, a2, 1
+; RV64IM-NEXT: slli s5, a0, 2
+; RV64IM-NEXT: andi s7, a2, 4
+; RV64IM-NEXT: slli s3, a0, 3
+; RV64IM-NEXT: andi s6, a2, 8
+; RV64IM-NEXT: slli t0, a0, 4
+; RV64IM-NEXT: andi t4, a2, 16
+; RV64IM-NEXT: slli a6, a0, 5
+; RV64IM-NEXT: andi t2, a2, 32
+; RV64IM-NEXT: slli a7, a0, 6
+; RV64IM-NEXT: andi t6, a2, 64
+; RV64IM-NEXT: slli t1, a0, 7
+; RV64IM-NEXT: andi s1, a2, 128
+; RV64IM-NEXT: slli t3, a0, 8
+; RV64IM-NEXT: andi s2, a2, 256
+; RV64IM-NEXT: slli t5, a0, 9
+; RV64IM-NEXT: andi s4, a2, 512
+; RV64IM-NEXT: slli s0, a0, 10
+; RV64IM-NEXT: andi s8, a2, 1024
+; RV64IM-NEXT: li a5, 1
+; RV64IM-NEXT: seqz s10, s10
+; RV64IM-NEXT: addi s10, s10, -1
+; RV64IM-NEXT: and s9, s10, s9
+; RV64IM-NEXT: sd s9, 856(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s9, 16
+; RV64IM-NEXT: seqz s7, s7
+; RV64IM-NEXT: addi s7, s7, -1
+; RV64IM-NEXT: and s5, s7, s5
+; RV64IM-NEXT: sd s5, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s7, 32
+; RV64IM-NEXT: seqz s5, s6
+; RV64IM-NEXT: addi s5, s5, -1
+; RV64IM-NEXT: and s3, s5, s3
+; RV64IM-NEXT: sd s3, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s5, 2048
+; RV64IM-NEXT: seqz t4, t4
+; RV64IM-NEXT: addi t4, t4, -1
+; RV64IM-NEXT: and t0, t4, t0
+; RV64IM-NEXT: sd t0, 816(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s6, 4096
+; RV64IM-NEXT: seqz a4, a4
+; RV64IM-NEXT: seqz t0, t2
+; RV64IM-NEXT: seqz t2, t6
+; RV64IM-NEXT: seqz t4, s1
+; RV64IM-NEXT: seqz t6, s2
+; RV64IM-NEXT: seqz s1, s4
+; RV64IM-NEXT: seqz s2, s8
+; RV64IM-NEXT: addi t0, t0, -1
+; RV64IM-NEXT: and a6, t0, a6
+; RV64IM-NEXT: sd a6, 800(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 11
+; RV64IM-NEXT: sd a6, 992(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: addi t2, t2, -1
+; RV64IM-NEXT: and a6, t2, a7
+; RV64IM-NEXT: sd a6, 824(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a6, a2, s9
+; RV64IM-NEXT: addi t4, t4, -1
+; RV64IM-NEXT: and a7, t4, t1
+; RV64IM-NEXT: sd a7, 792(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, a2, s7
+; RV64IM-NEXT: addi t6, t6, -1
+; RV64IM-NEXT: and t0, t6, t3
+; RV64IM-NEXT: sd t0, 784(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t0, a2, s5
+; RV64IM-NEXT: addi s1, s1, -1
+; RV64IM-NEXT: and t1, s1, t5
+; RV64IM-NEXT: sd t1, 808(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t1, a2, s6
+; RV64IM-NEXT: addi s2, s2, -1
+; RV64IM-NEXT: and t2, s2, s0
+; RV64IM-NEXT: sd t2, 832(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t2, a5, 31
+; RV64IM-NEXT: sd t2, 984(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a6, a0, a6
; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: xor a6, t1, a7
-; RV64IM-NEXT: sd a6, 688(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t3, a4, 32
-; RV64IM-NEXT: sd t3, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: mul a7, a0, t0
-; RV64IM-NEXT: xor a5, a5, a7
-; RV64IM-NEXT: sd a5, 680(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s2, a4, 33
-; RV64IM-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s4, a4, 34
-; RV64IM-NEXT: sd s4, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s7, a4, 35
-; RV64IM-NEXT: sd s7, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s11, a4, 36
-; RV64IM-NEXT: sd s11, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a5, a4, 37
-; RV64IM-NEXT: sd a5, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a6, a4, 38
-; RV64IM-NEXT: sd a6, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a7, a4, 39
-; RV64IM-NEXT: sd a7, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t0, a4, 40
-; RV64IM-NEXT: sd t0, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t4, a4, 41
+; RV64IM-NEXT: xor a6, a6, a7
+; RV64IM-NEXT: sd a6, 776(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 32
+; RV64IM-NEXT: sd a6, 976(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a6, a0, t0
+; RV64IM-NEXT: mul a7, a0, t1
+; RV64IM-NEXT: xor a6, a6, a7
+; RV64IM-NEXT: sd a6, 768(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t0, a5, 33
+; RV64IM-NEXT: sd t0, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t2, a5, 34
+; RV64IM-NEXT: sd t2, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t3, a5, 35
+; RV64IM-NEXT: sd t3, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t4, a5, 36
; RV64IM-NEXT: sd t4, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t5, a4, 42
+; RV64IM-NEXT: slli t5, a5, 37
; RV64IM-NEXT: sd t5, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli t6, a4, 43
+; RV64IM-NEXT: slli t6, a5, 38
; RV64IM-NEXT: sd t6, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s0, a4, 44
+; RV64IM-NEXT: slli s0, a5, 39
; RV64IM-NEXT: sd s0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s1, a4, 45
+; RV64IM-NEXT: slli s1, a5, 40
; RV64IM-NEXT: sd s1, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s3, a4, 46
-; RV64IM-NEXT: sd s3, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s5, a4, 47
-; RV64IM-NEXT: sd s5, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s6, a4, 48
-; RV64IM-NEXT: sd s6, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s8, a4, 49
-; RV64IM-NEXT: sd s8, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s9, a4, 50
-; RV64IM-NEXT: sd s9, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s10, a4, 51
-; RV64IM-NEXT: sd s10, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli ra, a4, 52
-; RV64IM-NEXT: sd ra, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 53
-; RV64IM-NEXT: sd a2, 768(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 54
-; RV64IM-NEXT: sd a2, 776(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 55
-; RV64IM-NEXT: sd a2, 784(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 56
-; RV64IM-NEXT: sd a2, 792(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 57
-; RV64IM-NEXT: sd a2, 800(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 58
-; RV64IM-NEXT: sd a2, 808(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 59
-; RV64IM-NEXT: sd a2, 816(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 60
-; RV64IM-NEXT: sd a2, 824(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a2, a4, 61
-; RV64IM-NEXT: sd a2, 840(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a4, a4, 62
-; RV64IM-NEXT: sd a4, 832(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: li a4, -1
-; RV64IM-NEXT: slli a4, a4, 63
-; RV64IM-NEXT: sd a4, 848(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 1
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 592(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 2
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: slli s2, a5, 41
+; RV64IM-NEXT: sd s2, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s3, a5, 42
+; RV64IM-NEXT: sd s3, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s4, a5, 43
+; RV64IM-NEXT: sd s4, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s5, a5, 44
+; RV64IM-NEXT: sd s5, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s6, a5, 45
+; RV64IM-NEXT: sd s6, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s7, a5, 46
+; RV64IM-NEXT: sd s7, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s8, a5, 47
+; RV64IM-NEXT: sd s8, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s9, a5, 48
+; RV64IM-NEXT: sd s9, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s10, a5, 49
+; RV64IM-NEXT: sd s10, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s11, a5, 50
+; RV64IM-NEXT: sd s11, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli ra, a5, 51
+; RV64IM-NEXT: sd ra, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 52
+; RV64IM-NEXT: sd a6, 968(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 53
+; RV64IM-NEXT: sd a6, 960(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 54
+; RV64IM-NEXT: sd a6, 952(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 55
+; RV64IM-NEXT: sd a6, 944(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 56
+; RV64IM-NEXT: sd a6, 936(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 57
+; RV64IM-NEXT: sd a6, 928(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 58
+; RV64IM-NEXT: sd a6, 920(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 59
+; RV64IM-NEXT: sd a6, 912(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 60
+; RV64IM-NEXT: sd a6, 904(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a6, a5, 61
+; RV64IM-NEXT: sd a6, 896(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a5, a5, 62
+; RV64IM-NEXT: sd a5, 888(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: li a5, -1
+; RV64IM-NEXT: slli a5, a5, 63
+; RV64IM-NEXT: sd a5, 880(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: sd a4, 688(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 1
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 624(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 2
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 4
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 8
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 752(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 64
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 584(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 128
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 664(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 256
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 712(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 512
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 1024
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 760(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 8192
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 544(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 16384
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 600(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 32768
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 672(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 65536
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 704(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 131072
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 728(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a5, 262144
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 744(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a5, 992(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: ld a4, 984(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, a2, a4
+; RV64IM-NEXT: ld a4, 976(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a7, a2, a4
+; RV64IM-NEXT: and t0, a2, t0
+; RV64IM-NEXT: and t1, a2, t2
+; RV64IM-NEXT: and t2, a2, t3
+; RV64IM-NEXT: and t3, a2, t4
+; RV64IM-NEXT: and t4, a2, t5
+; RV64IM-NEXT: and t5, a2, t6
+; RV64IM-NEXT: and t6, a2, s0
+; RV64IM-NEXT: and s0, a2, s1
+; RV64IM-NEXT: and s1, a2, s2
+; RV64IM-NEXT: and s2, a2, s3
+; RV64IM-NEXT: and s3, a2, s4
+; RV64IM-NEXT: and s4, a2, s5
+; RV64IM-NEXT: and s5, a2, s6
+; RV64IM-NEXT: and s6, a2, s7
+; RV64IM-NEXT: and s7, a2, s8
+; RV64IM-NEXT: and s8, a2, s9
+; RV64IM-NEXT: and s9, a2, s10
+; RV64IM-NEXT: and s10, a2, s11
+; RV64IM-NEXT: and s11, a2, ra
+; RV64IM-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and ra, a2, a4
+; RV64IM-NEXT: ld a4, 960(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 656(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 928(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 920(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 904(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 896(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a2, a4
+; RV64IM-NEXT: sd a4, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 880(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a2, a2, a4
+; RV64IM-NEXT: ld a4, 688(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a4, a0
+; RV64IM-NEXT: sd a4, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, a5
+; RV64IM-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, a6
+; RV64IM-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, a7
+; RV64IM-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, t0
+; RV64IM-NEXT: sd a4, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t1
+; RV64IM-NEXT: sd a5, 512(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t2
+; RV64IM-NEXT: sd a5, 560(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t3
+; RV64IM-NEXT: sd a5, 592(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t4
+; RV64IM-NEXT: sd a5, 640(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, t5
+; RV64IM-NEXT: sd a5, 688(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, t6
+; RV64IM-NEXT: sd a4, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, s0
+; RV64IM-NEXT: sd a4, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, s1
+; RV64IM-NEXT: sd a4, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, s2
+; RV64IM-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s3
+; RV64IM-NEXT: sd a5, 536(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s4
+; RV64IM-NEXT: sd a5, 568(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s5
+; RV64IM-NEXT: sd a5, 616(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s6
+; RV64IM-NEXT: sd a5, 648(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, s7
+; RV64IM-NEXT: sd a5, 680(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, s8
+; RV64IM-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, s9
+; RV64IM-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, s10
+; RV64IM-NEXT: sd a4, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a4, a0, s11
+; RV64IM-NEXT: sd a4, 488(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a0, ra
+; RV64IM-NEXT: sd a5, 520(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 656(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 640(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 16
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: sd a4, 552(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 632(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
; RV64IM-NEXT: sd a4, 576(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 32
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: ld a4, 608(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 64
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 528(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 664(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 512
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 496(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 1024
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: sd a4, 656(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 472(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 616(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 2048
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: sd a4, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 416(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 648(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 4096
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: sd a4, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 360(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 672(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 32768
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 352(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
; RV64IM-NEXT: sd a4, 472(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 65536
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 544(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 131072
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 624(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a4, 262144
-; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: ld a4, 344(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: sd a4, 656(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a4, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a4, a3, a4
-; RV64IM-NEXT: ld a2, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t1, a3, a2
-; RV64IM-NEXT: and a2, a3, t3
-; RV64IM-NEXT: sd a2, 608(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t3, a3, s2
-; RV64IM-NEXT: and s2, a3, s4
-; RV64IM-NEXT: and s4, a3, s7
-; RV64IM-NEXT: and s7, a3, s11
-; RV64IM-NEXT: and s11, a3, a5
-; RV64IM-NEXT: and a5, a3, a6
-; RV64IM-NEXT: and a6, a3, a7
-; RV64IM-NEXT: and a7, a3, t0
-; RV64IM-NEXT: and t0, a3, t4
-; RV64IM-NEXT: and t4, a3, t5
-; RV64IM-NEXT: and t5, a3, t6
-; RV64IM-NEXT: and t6, a3, s0
-; RV64IM-NEXT: and s0, a3, s1
-; RV64IM-NEXT: and s1, a3, s3
-; RV64IM-NEXT: and s3, a3, s5
-; RV64IM-NEXT: and s5, a3, s6
-; RV64IM-NEXT: and s6, a3, s8
-; RV64IM-NEXT: and s8, a3, s9
-; RV64IM-NEXT: and s9, a3, s10
-; RV64IM-NEXT: and s10, a3, ra
-; RV64IM-NEXT: ld a2, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and ra, a3, a2
-; RV64IM-NEXT: ld a2, 776(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 600(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 784(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 512(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 496(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 456(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 816(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a2, 848(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, a3, a2
-; RV64IM-NEXT: mul a3, a0, a4
-; RV64IM-NEXT: sd a3, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, t1
-; RV64IM-NEXT: sd a4, 568(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 608(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, t3
-; RV64IM-NEXT: sd a3, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, s2
-; RV64IM-NEXT: sd a3, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s4
-; RV64IM-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s7
-; RV64IM-NEXT: sd a4, 488(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s11
-; RV64IM-NEXT: sd a4, 536(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, a5
-; RV64IM-NEXT: sd a4, 584(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, a6
-; RV64IM-NEXT: sd a3, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, a7
-; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, t0
-; RV64IM-NEXT: sd a3, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, t4
-; RV64IM-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, t5
-; RV64IM-NEXT: sd a4, 464(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, t6
-; RV64IM-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s0
-; RV64IM-NEXT: sd a4, 552(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s1
-; RV64IM-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, s3
-; RV64IM-NEXT: sd a3, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, s5
-; RV64IM-NEXT: sd a3, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a0, s6
-; RV64IM-NEXT: sd a3, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s8
-; RV64IM-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s9
-; RV64IM-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, s10
-; RV64IM-NEXT: sd a4, 480(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a4, a0, ra
-; RV64IM-NEXT: sd a4, 520(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 600(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
-; RV64IM-NEXT: sd a4, 560(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 512(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
-; RV64IM-NEXT: sd a4, 600(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 496(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 456(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: sd a3, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
-; RV64IM-NEXT: sd a4, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
-; RV64IM-NEXT: sd a4, 456(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, a0, a3
; RV64IM-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a0, a0, a2
-; RV64IM-NEXT: sd a0, 512(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a0, a2
+; RV64IM-NEXT: sd a0, 528(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 2
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 1
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 360(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 4
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 2
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 8
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 3
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 344(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 16
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 4
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 32
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 5
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 64
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 6
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 128
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 7
+; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a0, t2, 64
-; RV64IM-NEXT: mul a0, a1, a0
-; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a2, a0
-; RV64IM-NEXT: sd a0, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a0, t2, 512
-; RV64IM-NEXT: mul a0, a1, a0
-; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a2, a0
-; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a0, 4
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: lui a2, 8
-; RV64IM-NEXT: and a2, t2, a2
-; RV64IM-NEXT: mul a0, a1, a0
-; RV64IM-NEXT: mul a2, a1, a2
-; RV64IM-NEXT: xor a0, a0, a2
+; RV64IM-NEXT: andi a0, a3, 256
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 8
+; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a0, 128
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: lui a2, 256
-; RV64IM-NEXT: and a2, t2, a2
+; RV64IM-NEXT: andi a0, a3, 512
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 9
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a0, a3, 1024
+; RV64IM-NEXT: seqz a0, a0
+; RV64IM-NEXT: addi a0, a0, -1
+; RV64IM-NEXT: slli a2, a1, 10
+; RV64IM-NEXT: and a0, a0, a2
+; RV64IM-NEXT: sd a0, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a0, 16
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: lui a2, 32
+; RV64IM-NEXT: and a2, a3, a2
; RV64IM-NEXT: mul a0, a1, a0
; RV64IM-NEXT: mul a2, a1, a2
; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a0, 8192
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: lui a2, 16384
-; RV64IM-NEXT: and a2, t2, a2
+; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a0, 2048
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: lui a2, 4096
+; RV64IM-NEXT: and a2, a3, a2
; RV64IM-NEXT: mul a0, a1, a0
; RV64IM-NEXT: mul a2, a1, a2
; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: sd a0, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a0, 1
-; RV64IM-NEXT: and a5, t2, a0
+; RV64IM-NEXT: and t4, a3, a0
; RV64IM-NEXT: lui a0, 2
-; RV64IM-NEXT: and a7, t2, a0
-; RV64IM-NEXT: lui a0, 16
-; RV64IM-NEXT: and t4, t2, a0
-; RV64IM-NEXT: lui a0, 32
-; RV64IM-NEXT: and a6, t2, a0
+; RV64IM-NEXT: and a7, a3, a0
+; RV64IM-NEXT: lui a0, 4
+; RV64IM-NEXT: and t5, a3, a0
+; RV64IM-NEXT: lui a0, 8
+; RV64IM-NEXT: and s0, a3, a0
; RV64IM-NEXT: lui a0, 64
-; RV64IM-NEXT: and t5, t2, a0
+; RV64IM-NEXT: and s2, a3, a0
+; RV64IM-NEXT: lui a0, 128
+; RV64IM-NEXT: and s4, a3, a0
+; RV64IM-NEXT: lui a0, 256
+; RV64IM-NEXT: and s3, a3, a0
; RV64IM-NEXT: lui a0, 512
-; RV64IM-NEXT: and t6, t2, a0
+; RV64IM-NEXT: and s6, a3, a0
; RV64IM-NEXT: lui a0, 1024
-; RV64IM-NEXT: and s0, t2, a0
-; RV64IM-NEXT: lui a0, 2048
-; RV64IM-NEXT: and s1, t2, a0
-; RV64IM-NEXT: lui a0, 4096
-; RV64IM-NEXT: and s2, t2, a0
-; RV64IM-NEXT: lui a0, 32768
-; RV64IM-NEXT: and s5, t2, a0
+; RV64IM-NEXT: and s7, a3, a0
+; RV64IM-NEXT: lui a0, 8192
+; RV64IM-NEXT: and s8, a3, a0
+; RV64IM-NEXT: lui a0, 16384
+; RV64IM-NEXT: and s9, a3, a0
+; RV64IM-NEXT: lui a5, 32768
+; RV64IM-NEXT: and a5, a3, a5
; RV64IM-NEXT: lui a0, 65536
-; RV64IM-NEXT: and s10, t2, a0
+; RV64IM-NEXT: and s10, a3, a0
; RV64IM-NEXT: lui a0, 131072
-; RV64IM-NEXT: and t3, t2, a0
+; RV64IM-NEXT: and s11, a3, a0
; RV64IM-NEXT: lui a0, 262144
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 760(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 752(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: ld a0, 992(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 984(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 984(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 976(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 976(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
+; RV64IM-NEXT: and a0, a3, a0
; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t0, a3, a0
; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 248(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: sd a0, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and ra, t2, a0
-; RV64IM-NEXT: ld a0, 776(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s11, t2, a0
-; RV64IM-NEXT: ld a0, 784(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s4, t2, a0
-; RV64IM-NEXT: ld a0, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s7, t2, a0
-; RV64IM-NEXT: ld a0, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s6, t2, a0
-; RV64IM-NEXT: ld a0, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s8, t2, a0
-; RV64IM-NEXT: ld a0, 816(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a4, t2, a0
-; RV64IM-NEXT: ld a0, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a0, t2, a0
-; RV64IM-NEXT: ld a2, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a2, t2, a2
-; RV64IM-NEXT: ld a3, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t1, t2, a3
-; RV64IM-NEXT: ld t0, 848(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and t0, t2, t0
-; RV64IM-NEXT: andi a3, t2, 1024
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, a5
-; RV64IM-NEXT: sd a3, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, a7
-; RV64IM-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s9, a1, t4
-; RV64IM-NEXT: mul a3, a1, a6
-; RV64IM-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, t5
-; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s3, a1, t6
-; RV64IM-NEXT: mul a3, a1, s0
-; RV64IM-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, s1
-; RV64IM-NEXT: sd a3, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, s2
-; RV64IM-NEXT: sd a3, 792(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s2, a1, s5
-; RV64IM-NEXT: mul a3, a1, s10
-; RV64IM-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, t3
-; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 776(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t5, a1, a3
-; RV64IM-NEXT: ld a3, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 824(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s0, a1, a3
-; RV64IM-NEXT: ld a3, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t6, a1, a3
-; RV64IM-NEXT: ld a3, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 968(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 960(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and ra, a3, a0
+; RV64IM-NEXT: ld a0, 952(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, a3, a0
+; RV64IM-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a4, a3, a4
+; RV64IM-NEXT: ld a0, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s5, a3, a0
+; RV64IM-NEXT: ld a0, 928(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t2, a3, a0
+; RV64IM-NEXT: ld s1, 920(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s1, a3, s1
+; RV64IM-NEXT: ld a0, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t6, a3, a0
+; RV64IM-NEXT: ld a2, 904(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a2, a3, a2
+; RV64IM-NEXT: ld a0, 896(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a0, a3, a0
+; RV64IM-NEXT: ld t1, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t3, a3, t1
+; RV64IM-NEXT: ld t1, 880(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t1, a3, t1
+; RV64IM-NEXT: andi a3, a3, 1
+; RV64IM-NEXT: seqz a3, a3
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: mul t4, a1, t4
+; RV64IM-NEXT: sd t4, 24(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, a7
+; RV64IM-NEXT: sd a7, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, t5
+; RV64IM-NEXT: sd a7, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s0
+; RV64IM-NEXT: sd a7, 912(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s2
+; RV64IM-NEXT: sd a7, 16(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s4
+; RV64IM-NEXT: sd a7, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s3
+; RV64IM-NEXT: sd a7, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s6
+; RV64IM-NEXT: sd a7, 888(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s7
+; RV64IM-NEXT: sd a7, 968(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s8
+; RV64IM-NEXT: sd a7, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a1, s9
+; RV64IM-NEXT: sd a7, 40(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a1, a5
+; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a1, s10
+; RV64IM-NEXT: sd a5, 880(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a5, a1, s11
+; RV64IM-NEXT: sd a5, 936(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a5, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a5, a1, a5
+; RV64IM-NEXT: sd a5, 992(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t5, a3, a1
; RV64IM-NEXT: ld a3, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 752(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 800(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul s6, a1, a3
+; RV64IM-NEXT: ld a3, 984(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s8, a1, a3
+; RV64IM-NEXT: ld a3, 976(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s7, a1, a3
; RV64IM-NEXT: ld a3, 136(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 136(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t4, a1, a3
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 144(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t3, a1, a3
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 256(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s5, a1, a3
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 896(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 168(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 960(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 984(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 760(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul s3, a1, a3
; RV64IM-NEXT: ld a3, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s2, a1, a3
+; RV64IM-NEXT: mul a3, a1, t0
+; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 208(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 816(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 224(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 200(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 920(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 88(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 952(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a1, a3
+; RV64IM-NEXT: sd a3, 976(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a3, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t2, a1, a3
-; RV64IM-NEXT: ld a3, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a1, a3
+; RV64IM-NEXT: mul s0, a1, a3
; RV64IM-NEXT: ld a3, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s1, a1, a3
-; RV64IM-NEXT: ld a3, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul t4, a1, a3
+; RV64IM-NEXT: ld a3, 48(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 288(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a3, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a3, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 32(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a3, a1, a3
-; RV64IM-NEXT: sd a3, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a3, a1, ra
-; RV64IM-NEXT: sd a3, 784(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, s11
-; RV64IM-NEXT: sd a3, 808(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a3, a1, s4
-; RV64IM-NEXT: sd a3, 832(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s7, a1, s7
-; RV64IM-NEXT: mul s6, a1, s6
-; RV64IM-NEXT: mul s11, a1, s8
+; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a3, a1, a6
+; RV64IM-NEXT: sd a3, 288(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a3, a1, a4
-; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a3, 904(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a3, a1, s5
+; RV64IM-NEXT: sd a3, 928(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a3, a1, t2
+; RV64IM-NEXT: sd a3, 944(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul s1, a1, s1
+; RV64IM-NEXT: mul t6, a1, t6
+; RV64IM-NEXT: mul s4, a1, a2
; RV64IM-NEXT: mul a0, a1, a0
-; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a0, a1, a2
+; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a0, a1, t3
; RV64IM-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mul a0, a1, t1
-; RV64IM-NEXT: sd a0, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a0, a1, t0
-; RV64IM-NEXT: sd a0, 768(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 720(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 712(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a0, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 856(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t2, a1, a0
+; RV64IM-NEXT: ld a0, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, a0, a1
+; RV64IM-NEXT: ld a0, 816(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 800(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s5, a0, a1
+; RV64IM-NEXT: ld a0, 792(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 784(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor ra, a0, a1
+; RV64IM-NEXT: ld a0, 624(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a1, a0
+; RV64IM-NEXT: ld a0, 776(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 584(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t0, a0, a1
-; RV64IM-NEXT: ld a0, 728(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 704(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, a1, a0
-; RV64IM-NEXT: ld a0, 592(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor ra, a1, a0
-; RV64IM-NEXT: ld a0, 696(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 576(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a0, 768(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 544(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, a0, a1
+; RV64IM-NEXT: ld a0, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a3, 440(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a0, a3
+; RV64IM-NEXT: ld a0, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a4, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a0, a4
+; RV64IM-NEXT: ld a0, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a5, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a0, a5
+; RV64IM-NEXT: ld a0, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a0, a1
+; RV64IM-NEXT: ld a0, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, a0
+; RV64IM-NEXT: ld a0, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 688(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a2, 528(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a1, a2
-; RV64IM-NEXT: ld a2, 680(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a3, 472(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, a3
-; RV64IM-NEXT: ld a3, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a4, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: ld a4, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a5, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s8, a4, a5
-; RV64IM-NEXT: ld a4, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a5, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, a5
-; RV64IM-NEXT: ld a5, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a6, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, a6
-; RV64IM-NEXT: ld a6, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, s10
-; RV64IM-NEXT: ld s10, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s10, s10, s4
-; RV64IM-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t5, s4
-; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s9, s4, s9
-; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s3, s4, s3
-; RV64IM-NEXT: ld s4, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s4, s2
-; RV64IM-NEXT: xor t6, s0, t6
-; RV64IM-NEXT: xor t3, t4, t3
-; RV64IM-NEXT: xor a7, t2, a7
-; RV64IM-NEXT: xor t2, s7, s6
-; RV64IM-NEXT: xor t0, t0, t1
-; RV64IM-NEXT: ld t1, 640(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, ra, t1
-; RV64IM-NEXT: ld t4, 632(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a0, t4
-; RV64IM-NEXT: ld t4, 616(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, a1, t4
-; RV64IM-NEXT: ld t4, 544(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t4
-; RV64IM-NEXT: ld t4, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t4
-; RV64IM-NEXT: ld t4, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, s8, t4
-; RV64IM-NEXT: ld s0, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, s11
+; RV64IM-NEXT: ld s10, 24(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s6, s6, s10
+; RV64IM-NEXT: ld s11, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s11, s11, s10
+; RV64IM-NEXT: ld s10, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s9, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s10, s10, s9
+; RV64IM-NEXT: xor s7, s8, s7
+; RV64IM-NEXT: xor s2, s3, s2
+; RV64IM-NEXT: xor t4, s0, t4
+; RV64IM-NEXT: xor t6, s1, t6
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 824(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s5, t3
+; RV64IM-NEXT: ld s0, 808(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, ra, s0
+; RV64IM-NEXT: ld s1, 696(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, s1
+; RV64IM-NEXT: ld s1, 664(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, s1
+; RV64IM-NEXT: ld s1, 600(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, s1
+; RV64IM-NEXT: ld s1, 480(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, s1
+; RV64IM-NEXT: ld s1, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, s1
+; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, s1
+; RV64IM-NEXT: ld s1, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, s1
+; RV64IM-NEXT: xor a0, t5, a0
+; RV64IM-NEXT: ld t5, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, t5
+; RV64IM-NEXT: ld t5, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, t5
+; RV64IM-NEXT: ld t5, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, s6, t5
+; RV64IM-NEXT: ld s1, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, s11, s1
+; RV64IM-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s3, s10, s3
+; RV64IM-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s5, s7, s5
+; RV64IM-NEXT: ld s6, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s2, s6
+; RV64IM-NEXT: ld s6, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, s6
+; RV64IM-NEXT: xor t6, t6, s4
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 832(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s0, t3
+; RV64IM-NEXT: ld s0, 720(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, s0
+; RV64IM-NEXT: ld s0, 712(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, s0
+; RV64IM-NEXT: ld s0, 672(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, s0
+; RV64IM-NEXT: ld s0, 512(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, s0
+; RV64IM-NEXT: ld s0, 504(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a4, a4, s0
-; RV64IM-NEXT: ld s0, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 488(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a5, a5, s0
-; RV64IM-NEXT: xor a6, a6, s10
-; RV64IM-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t5, s0
-; RV64IM-NEXT: ld s0, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s9, s0
-; RV64IM-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s3, s3, s4
-; RV64IM-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s2, s4
-; RV64IM-NEXT: ld s4, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, t6, s4
-; RV64IM-NEXT: xor t3, t3, s5
-; RV64IM-NEXT: xor a7, a7, s1
-; RV64IM-NEXT: xor t2, t2, s11
-; RV64IM-NEXT: xor t0, t0, t1
-; RV64IM-NEXT: ld t1, 664(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a0, t1
-; RV64IM-NEXT: ld t1, 648(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, a1, t1
-; RV64IM-NEXT: ld t1, 624(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t1
-; RV64IM-NEXT: ld t1, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t1
-; RV64IM-NEXT: ld t1, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t4, t1
-; RV64IM-NEXT: ld t4, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, t4
-; RV64IM-NEXT: ld t4, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, t4
-; RV64IM-NEXT: xor a6, a6, t5
-; RV64IM-NEXT: ld t4, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, s0, t4
-; RV64IM-NEXT: ld t5, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, s3, t5
-; RV64IM-NEXT: ld s0, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 472(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, s0
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t5, a2
+; RV64IM-NEXT: ld t5, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, s1, t5
+; RV64IM-NEXT: ld s0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s3, s0
+; RV64IM-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, s5, s1
+; RV64IM-NEXT: ld s3, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s2, s3
+; RV64IM-NEXT: ld s3, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, s3
+; RV64IM-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, t6, s3
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 752(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, t3
+; RV64IM-NEXT: ld t3, 736(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, t3
+; RV64IM-NEXT: ld t3, 704(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, t3
+; RV64IM-NEXT: ld t3, 560(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t3
+; RV64IM-NEXT: ld t3, 536(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, t3
+; RV64IM-NEXT: ld t3, 520(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, t3
+; RV64IM-NEXT: ld t3, 496(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, t3
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t5, a2
+; RV64IM-NEXT: ld t3, 880(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s0, t3
+; RV64IM-NEXT: ld t5, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, s1, t5
+; RV64IM-NEXT: ld s0, 224(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor s0, s2, s0
-; RV64IM-NEXT: ld s1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, s1
+; RV64IM-NEXT: ld s1, 200(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t6, t6, s1
-; RV64IM-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t3, t3, s1
-; RV64IM-NEXT: ld s1, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, s1
-; RV64IM-NEXT: ld s1, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, s1
-; RV64IM-NEXT: xor a0, t0, a0
-; RV64IM-NEXT: ld t0, 672(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, a1, t0
-; RV64IM-NEXT: ld t0, 656(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t0
-; RV64IM-NEXT: ld t0, 488(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t0
-; RV64IM-NEXT: ld t0, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, t2, a7
+; RV64IM-NEXT: ld t2, 760(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, t2
+; RV64IM-NEXT: ld t2, 728(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, t2
+; RV64IM-NEXT: ld t2, 592(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t2
+; RV64IM-NEXT: ld t2, 568(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, t2
+; RV64IM-NEXT: ld t2, 552(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, t2
+; RV64IM-NEXT: ld t2, 528(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, t2
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 968(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t3, a2
+; RV64IM-NEXT: ld t2, 896(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t2, t5, t2
+; RV64IM-NEXT: ld t3, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s0, t3
+; RV64IM-NEXT: ld t5, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, t5
+; RV64IM-NEXT: ld t5, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t6, t5
+; RV64IM-NEXT: xor a7, a7, t0
+; RV64IM-NEXT: ld t0, 744(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t0, t1, t0
-; RV64IM-NEXT: ld t1, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t1, 640(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t1
+; RV64IM-NEXT: ld t1, 616(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a4, a4, t1
-; RV64IM-NEXT: ld t1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t1, 576(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a5, a5, t1
-; RV64IM-NEXT: xor a6, a6, t4
-; RV64IM-NEXT: ld t1, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t5, t1
-; RV64IM-NEXT: ld t4, 776(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, s0, t4
-; RV64IM-NEXT: ld t5, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t6, t5
-; RV64IM-NEXT: ld t6, 216(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t3, t3, t6
-; RV64IM-NEXT: ld t6, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t6
-; RV64IM-NEXT: ld t6, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, t6
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 568(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 992(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: ld a2, 536(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 504(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, t0, a3
-; RV64IM-NEXT: ld t0, 480(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 960(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t2, a2
+; RV64IM-NEXT: ld t1, 920(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t3, t1
+; RV64IM-NEXT: ld t2, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t2, t4, t2
+; RV64IM-NEXT: xor a7, a7, t0
+; RV64IM-NEXT: ld t0, 688(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t0
+; RV64IM-NEXT: ld t0, 648(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a4, a4, t0
-; RV64IM-NEXT: ld t0, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t0, 608(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a5, a5, t0
-; RV64IM-NEXT: xor a6, a6, t1
-; RV64IM-NEXT: ld t0, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t4, t0
-; RV64IM-NEXT: ld t1, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t5, t1
-; RV64IM-NEXT: ld t4, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t3, t3, t4
-; RV64IM-NEXT: ld t4, 240(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t4
-; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, t4
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 584(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 984(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: ld a2, 552(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 520(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a4, a3
-; RV64IM-NEXT: ld a4, 496(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a5, a4
-; RV64IM-NEXT: xor a5, a6, t0
-; RV64IM-NEXT: ld a6, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, t1, a6
-; RV64IM-NEXT: ld t0, 816(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t3, t0
-; RV64IM-NEXT: ld t1, 784(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t1
-; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t2, t1
+; RV64IM-NEXT: ld a2, 952(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t1, a2
+; RV64IM-NEXT: ld t0, 904(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t2, t0
+; RV64IM-NEXT: xor a3, a7, a3
+; RV64IM-NEXT: ld a7, 680(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, a7
+; RV64IM-NEXT: ld a7, 632(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, a7
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 608(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 976(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: ld a2, 560(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: ld a3, 512(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a4, a3
-; RV64IM-NEXT: xor a4, a5, a6
-; RV64IM-NEXT: ld a5, 848(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, t0, a5
-; RV64IM-NEXT: ld a6, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a7, a6
-; RV64IM-NEXT: ld a7, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, t1, a7
+; RV64IM-NEXT: ld a2, 928(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t0, a2
+; RV64IM-NEXT: xor a3, a3, a4
+; RV64IM-NEXT: ld a4, 656(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a5, a4
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 600(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 944(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: xor a4, a4, a5
-; RV64IM-NEXT: ld a2, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a6, a2
+; RV64IM-NEXT: xor a3, a3, a4
; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: xor a2, a4, a2
-; RV64IM-NEXT: xor s1, a0, a3
-; RV64IM-NEXT: xor s2, a2, a7
-; RV64IM-NEXT: ld a0, 736(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, a3, a6
+; RV64IM-NEXT: xor s2, a0, t5
+; RV64IM-NEXT: ld a0, 864(sp) # 8-byte Folded Reload
; RV64IM-NEXT: sd s1, 0(a0)
; RV64IM-NEXT: sd s2, 8(a0)
; RV64IM-NEXT: mv a0, s1
; RV64IM-NEXT: mv a1, s2
; RV64IM-NEXT: call vector_use
-; RV64IM-NEXT: ld a0, 744(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a0, 872(sp) # 8-byte Folded Reload
; RV64IM-NEXT: sd s1, 0(a0)
; RV64IM-NEXT: sd s2, 8(a0)
-; RV64IM-NEXT: ld ra, 952(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s0, 944(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 936(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s2, 928(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s3, 920(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 912(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s5, 904(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s6, 896(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s7, 888(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s8, 880(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s9, 872(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 864(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s11, 856(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: addi sp, sp, 960
+; RV64IM-NEXT: ld ra, 1096(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 1088(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 1080(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s2, 1072(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s3, 1064(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s4, 1056(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s5, 1048(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s6, 1040(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s7, 1032(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s8, 1024(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s9, 1016(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s10, 1008(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 1000(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 1104
; RV64IM-NEXT: ret
;
; RV32IMZBS-LABEL: mul_use_commutative_clmul_v2i64:
; RV32IMZBS: # %bb.0:
-; RV32IMZBS-NEXT: addi sp, sp, -816
-; RV32IMZBS-NEXT: sw ra, 812(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s0, 808(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s1, 804(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s2, 800(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s3, 796(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s4, 792(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s5, 788(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s6, 784(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s7, 780(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s8, 776(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s9, 772(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s10, 768(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s11, 764(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a3, 692(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a2, 688(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw ra, 0(a1)
+; RV32IMZBS-NEXT: addi sp, sp, -800
+; RV32IMZBS-NEXT: sw ra, 796(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s0, 792(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s1, 788(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s2, 784(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s3, 780(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s4, 776(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s5, 772(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s6, 768(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s7, 764(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s8, 760(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s9, 756(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s10, 752(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s11, 748(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a3, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a2, 712(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a4, 0(a1)
; RV32IMZBS-NEXT: lw a5, 4(a1)
-; RV32IMZBS-NEXT: lw a2, 8(a1)
-; RV32IMZBS-NEXT: sw a2, 724(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 12(a1)
-; RV32IMZBS-NEXT: sw a1, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 0(a0)
-; RV32IMZBS-NEXT: lw a2, 4(a0)
-; RV32IMZBS-NEXT: lw s10, 8(a0)
-; RV32IMZBS-NEXT: lw a0, 12(a0)
-; RV32IMZBS-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s4, 8(a1)
+; RV32IMZBS-NEXT: lw s9, 12(a1)
+; RV32IMZBS-NEXT: lw a7, 0(a0)
+; RV32IMZBS-NEXT: lw t0, 4(a0)
+; RV32IMZBS-NEXT: lw t1, 8(a0)
+; RV32IMZBS-NEXT: lw s2, 12(a0)
; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: bseti a7, zero, 11
-; RV32IMZBS-NEXT: lui s6, 1
-; RV32IMZBS-NEXT: addi t0, a0, -256
-; RV32IMZBS-NEXT: sw t0, 732(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli a0, a1, 8
-; RV32IMZBS-NEXT: srli a6, a1, 24
-; RV32IMZBS-NEXT: and a0, a0, t0
-; RV32IMZBS-NEXT: or a0, a0, a6
-; RV32IMZBS-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 2
-; RV32IMZBS-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 1
+; RV32IMZBS-NEXT: addi s8, a0, -256
+; RV32IMZBS-NEXT: srli t2, a7, 8
+; RV32IMZBS-NEXT: srli t3, a7, 24
+; RV32IMZBS-NEXT: and a3, a7, s8
+; RV32IMZBS-NEXT: slli s10, a7, 24
+; RV32IMZBS-NEXT: srli t4, a4, 8
+; RV32IMZBS-NEXT: srli s0, a4, 24
+; RV32IMZBS-NEXT: and t5, a4, s8
+; RV32IMZBS-NEXT: slli t6, a4, 24
+; RV32IMZBS-NEXT: slli a6, t0, 1
+; RV32IMZBS-NEXT: andi s5, a4, 2
+; RV32IMZBS-NEXT: slli s1, t0, 2
+; RV32IMZBS-NEXT: andi s3, a4, 4
+; RV32IMZBS-NEXT: slli a1, t0, 3
+; RV32IMZBS-NEXT: andi a2, a4, 8
+; RV32IMZBS-NEXT: andi a0, a4, 16
+; RV32IMZBS-NEXT: slli s6, a7, 1
+; RV32IMZBS-NEXT: srli s7, t1, 8
+; RV32IMZBS-NEXT: and t2, t2, s8
+; RV32IMZBS-NEXT: or t2, t2, t3
+; RV32IMZBS-NEXT: srli t3, t1, 24
+; RV32IMZBS-NEXT: and t4, t4, s8
+; RV32IMZBS-NEXT: or t4, t4, s0
+; RV32IMZBS-NEXT: srli s0, s4, 8
+; RV32IMZBS-NEXT: slli t5, t5, 8
+; RV32IMZBS-NEXT: or t5, t6, t5
+; RV32IMZBS-NEXT: srli t6, s4, 24
+; RV32IMZBS-NEXT: and s7, s7, s8
+; RV32IMZBS-NEXT: or t3, s7, t3
+; RV32IMZBS-NEXT: and s7, s4, s8
+; RV32IMZBS-NEXT: and s0, s0, s8
+; RV32IMZBS-NEXT: sw s8, 708(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or t6, s0, t6
+; RV32IMZBS-NEXT: slli s0, s4, 24
+; RV32IMZBS-NEXT: slli s7, s7, 8
+; RV32IMZBS-NEXT: or s0, s0, s7
+; RV32IMZBS-NEXT: andi s7, a5, 2
+; RV32IMZBS-NEXT: slli a3, a3, 8
+; RV32IMZBS-NEXT: or a3, s10, a3
+; RV32IMZBS-NEXT: or a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a3, t1, s8
+; RV32IMZBS-NEXT: or t2, t5, t4
+; RV32IMZBS-NEXT: sw t2, 696(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s8, t1, 24
+; RV32IMZBS-NEXT: slli a3, a3, 8
+; RV32IMZBS-NEXT: or a3, s8, a3
+; RV32IMZBS-NEXT: or a3, a3, t3
+; RV32IMZBS-NEXT: sw a3, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, a7, 2
+; RV32IMZBS-NEXT: or t2, s0, t6
+; RV32IMZBS-NEXT: sw t2, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi t2, a5, 4
+; RV32IMZBS-NEXT: seqz t3, s5
+; RV32IMZBS-NEXT: seqz t4, s7
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: and t5, t3, a6
+; RV32IMZBS-NEXT: and a6, t4, s6
+; RV32IMZBS-NEXT: and t6, t3, s6
+; RV32IMZBS-NEXT: slli t3, a7, 3
+; RV32IMZBS-NEXT: seqz t4, s3
+; RV32IMZBS-NEXT: seqz t2, t2
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: and s3, t4, s1
+; RV32IMZBS-NEXT: and s0, t2, a3
+; RV32IMZBS-NEXT: and t4, t4, a3
+; RV32IMZBS-NEXT: andi a3, a5, 8
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and s6, a2, a1
+; RV32IMZBS-NEXT: and s5, a3, t3
+; RV32IMZBS-NEXT: and t3, a2, t3
+; RV32IMZBS-NEXT: andi a1, a5, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 4
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 4
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 4
-; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 8
-; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 16
-; RV32IMZBS-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 32
+; RV32IMZBS-NEXT: andi a0, a4, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a5, 32
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 5
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 5
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, a4, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a5, 64
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 6
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 6
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, a4, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a5, 128
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 7
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 564(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 7
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 64
-; RV32IMZBS-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 128
-; RV32IMZBS-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 256
-; RV32IMZBS-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 512
-; RV32IMZBS-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 1024
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a7, 728(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a5, a7
-; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a5, s6
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s4, 2
-; RV32IMZBS-NEXT: and a0, a5, s4
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s3, 4
-; RV32IMZBS-NEXT: and a0, a5, s3
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s2, 8
-; RV32IMZBS-NEXT: and a0, a5, s2
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t6, 16
-; RV32IMZBS-NEXT: and a0, a5, t6
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s1, 32
-; RV32IMZBS-NEXT: and a0, a5, s1
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t5, 64
-; RV32IMZBS-NEXT: and a0, a5, t5
-; RV32IMZBS-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t4, 128
-; RV32IMZBS-NEXT: and a0, a5, t4
-; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t3, 256
-; RV32IMZBS-NEXT: and a0, a5, t3
-; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t1, 512
-; RV32IMZBS-NEXT: and a0, a5, t1
-; RV32IMZBS-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a6, 1024
-; RV32IMZBS-NEXT: and a0, a5, a6
-; RV32IMZBS-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 2048
-; RV32IMZBS-NEXT: and a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 4096
-; RV32IMZBS-NEXT: and a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 8192
-; RV32IMZBS-NEXT: and a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s0, 16384
-; RV32IMZBS-NEXT: and a0, a5, s0
-; RV32IMZBS-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t2, 32768
-; RV32IMZBS-NEXT: and a0, a5, t2
-; RV32IMZBS-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 65536
-; RV32IMZBS-NEXT: and a0, a5, t0
-; RV32IMZBS-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s11, 131072
-; RV32IMZBS-NEXT: and a0, a5, s11
-; RV32IMZBS-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a4, 262144
-; RV32IMZBS-NEXT: and a0, a5, a4
-; RV32IMZBS-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and a5, a5, a0
-; RV32IMZBS-NEXT: sw a5, 356(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 2
-; RV32IMZBS-NEXT: sw a3, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 1
-; RV32IMZBS-NEXT: sw a3, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 4
-; RV32IMZBS-NEXT: sw a3, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 8
-; RV32IMZBS-NEXT: sw a3, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 16
-; RV32IMZBS-NEXT: sw a3, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 32
-; RV32IMZBS-NEXT: sw a3, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 64
-; RV32IMZBS-NEXT: sw a3, 348(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s9, ra, 128
-; RV32IMZBS-NEXT: sw s9, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, ra, 256
-; RV32IMZBS-NEXT: sw a3, 352(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s8, ra, 512
-; RV32IMZBS-NEXT: sw s8, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s7, ra, 1024
-; RV32IMZBS-NEXT: sw s7, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s5, ra, a7
-; RV32IMZBS-NEXT: sw s5, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s6, ra, s6
-; RV32IMZBS-NEXT: and s4, ra, s4
-; RV32IMZBS-NEXT: sw s4, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s3, ra, s3
-; RV32IMZBS-NEXT: sw s3, 432(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s2, ra, s2
-; RV32IMZBS-NEXT: sw s2, 436(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t6, ra, t6
-; RV32IMZBS-NEXT: sw t6, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s1, ra, s1
-; RV32IMZBS-NEXT: and t5, ra, t5
-; RV32IMZBS-NEXT: sw t5, 444(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t4, ra, t4
-; RV32IMZBS-NEXT: sw t4, 448(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t3, ra, t3
-; RV32IMZBS-NEXT: sw t3, 452(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t1, ra, t1
-; RV32IMZBS-NEXT: sw t1, 456(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a6, ra, a6
-; RV32IMZBS-NEXT: sw a6, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 2048
-; RV32IMZBS-NEXT: and a7, ra, a3
-; RV32IMZBS-NEXT: lui a3, 4096
-; RV32IMZBS-NEXT: and a5, ra, a3
-; RV32IMZBS-NEXT: sw a5, 464(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 8192
-; RV32IMZBS-NEXT: and a3, ra, a3
-; RV32IMZBS-NEXT: sw a3, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s0, ra, s0
-; RV32IMZBS-NEXT: and t2, ra, t2
-; RV32IMZBS-NEXT: and t0, ra, t0
-; RV32IMZBS-NEXT: and s11, ra, s11
-; RV32IMZBS-NEXT: sw ra, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, ra, a4
-; RV32IMZBS-NEXT: sw a4, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, ra, a0
-; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw ra, 348(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, ra
-; RV32IMZBS-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s9
-; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s9, 352(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, s9
-; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s8
-; RV32IMZBS-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s7
-; RV32IMZBS-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s5
-; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s6
-; RV32IMZBS-NEXT: mv s5, s6
-; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s4
-; RV32IMZBS-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s3
-; RV32IMZBS-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s2
-; RV32IMZBS-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t6
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s1
-; RV32IMZBS-NEXT: mv t6, s1
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t5
+; RV32IMZBS-NEXT: andi a0, a4, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a5, 256
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 8
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 8
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t4
-; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t3
-; RV32IMZBS-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t1
-; RV32IMZBS-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, a6
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, a7
-; RV32IMZBS-NEXT: mv a6, a7
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, a5
-; RV32IMZBS-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, a3
-; RV32IMZBS-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s0
-; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t2
-; RV32IMZBS-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t0
-; RV32IMZBS-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a2, s11
-; RV32IMZBS-NEXT: mv t1, s11
-; RV32IMZBS-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a5, a2, a0
-; RV32IMZBS-NEXT: mul a0, a2, a4
-; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t3, a1, a0
-; RV32IMZBS-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t4, a1, a0
-; RV32IMZBS-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s1, a1, a0
-; RV32IMZBS-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s3, a1, a0
-; RV32IMZBS-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s4, a1, a0
-; RV32IMZBS-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s6, a1, a0
-; RV32IMZBS-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, a4, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a5, 512
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 9
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 9
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, a4, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, a5, 1024
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 10
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 10
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a0, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 11
+; RV32IMZBS-NEXT: addi a2, a1, -1
+; RV32IMZBS-NEXT: not a1, a5
+; RV32IMZBS-NEXT: bexti a3, a1, 11
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 11
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 488(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 11
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 508(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 12
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 12
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 472(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 12
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 484(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 13
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 13
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 13
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 544(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 572(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 14
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 14
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 14
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 15
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 15
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 16
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 16
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 436(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 16
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 440(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 456(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 17
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 17
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 424(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 17
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 432(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 452(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 18
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 18
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 460(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 18
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 464(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 492(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 19
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 19
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 520(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 19
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 528(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 560(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 20
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 20
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 568(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 20
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 576(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 21
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 21
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 592(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 21
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 22
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 22
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 400(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 22
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 404(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 412(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 23
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 23
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 23
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 392(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 23
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 396(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 408(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 24
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 24
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 24
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 416(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a3, a3, s10
+; RV32IMZBS-NEXT: sw a3, 420(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, s10
+; RV32IMZBS-NEXT: sw a2, 428(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 25
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 25
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 25
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 444(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 25
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 448(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 468(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 26
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 26
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 26
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 476(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 26
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 480(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 496(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 27
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 27
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 27
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 500(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 27
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 504(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 512(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 28
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 28
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 28
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 532(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 28
+; RV32IMZBS-NEXT: and a3, a3, t2
+; RV32IMZBS-NEXT: sw a3, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a0, 29
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a1, 29
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli t2, t0, 29
+; RV32IMZBS-NEXT: and t2, a2, t2
+; RV32IMZBS-NEXT: sw t2, 368(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 29
+; RV32IMZBS-NEXT: and s10, a3, t2
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 384(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: bexti a1, a1, 30
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 30
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 364(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 30
+; RV32IMZBS-NEXT: and t2, a1, a2
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, a4, 1
+; RV32IMZBS-NEXT: srli a1, a4, 31
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a2, a5, 1
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a3, t0, 31
+; RV32IMZBS-NEXT: and a4, a0, t0
+; RV32IMZBS-NEXT: sw a4, 344(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a7
+; RV32IMZBS-NEXT: sw a2, 360(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and t0, a0, a7
+; RV32IMZBS-NEXT: slli a7, a7, 31
+; RV32IMZBS-NEXT: srli a5, a5, 31
+; RV32IMZBS-NEXT: seqz a0, a1
+; RV32IMZBS-NEXT: seqz a1, a5
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a3, a0, a3
+; RV32IMZBS-NEXT: sw a3, 372(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a7
+; RV32IMZBS-NEXT: sw a1, 380(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a7
+; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 2
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 1
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 332(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 1
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 348(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a7, a0, a2
+; RV32IMZBS-NEXT: andi a0, s4, 4
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 4
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 2
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 312(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 2
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 320(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 8
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 3
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 288(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 3
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 296(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 324(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 16
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 4
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 264(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 4
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 276(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 32
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 5
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 244(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 5
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 256(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 280(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 64
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 6
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 308(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 6
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 316(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 128
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 7
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 208(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 7
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 256
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 8
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 184(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 8
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 512
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 9
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 240(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 9
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s4, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 1024
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, s2, 10
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 328(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, t1, 10
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 336(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a1, s4
+; RV32IMZBS-NEXT: bexti a0, a1, 11
+; RV32IMZBS-NEXT: addi a2, a0, -1
+; RV32IMZBS-NEXT: not a0, s9
+; RV32IMZBS-NEXT: bexti a3, a0, 11
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 11
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 140(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 11
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 144(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 152(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 12
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 12
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 112(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 12
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 124(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 148(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 13
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 13
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 172(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 13
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 180(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 196(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 14
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 14
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 252(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 14
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 260(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 268(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 15
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 284(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 15
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 292(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 300(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 16
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 16
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 68(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 16
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 72(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 84(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 17
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 17
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 60(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 17
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 64(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 76(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 18
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 18
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 92(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 18
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 100(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 120(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 19
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 19
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 168(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 19
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 176(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 188(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 20
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 20
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 200(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 20
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 204(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 212(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 21
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 21
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 220(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 21
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 224(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 232(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 22
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 22
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 32(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 22
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 36(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 44(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 23
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 23
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 23
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 24(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 23
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 28(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 24
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 24
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 24
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 48(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a3, a3, s8
+; RV32IMZBS-NEXT: sw a3, 52(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, s8
+; RV32IMZBS-NEXT: sw a2, 56(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 25
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 25
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 25
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 80(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 25
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 88(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 96(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 26
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 26
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 26
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 104(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 26
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 108(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 116(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 27
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 27
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 27
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 128(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 27
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 132(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 136(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 28
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 28
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 28
+; RV32IMZBS-NEXT: and a5, a2, a5
+; RV32IMZBS-NEXT: sw a5, 160(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, t1, 28
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 156(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 29
+; RV32IMZBS-NEXT: addi a5, a2, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 29
+; RV32IMZBS-NEXT: addi a4, a2, -1
+; RV32IMZBS-NEXT: slli a2, s2, 29
+; RV32IMZBS-NEXT: and s11, a5, a2
+; RV32IMZBS-NEXT: slli a3, t1, 29
+; RV32IMZBS-NEXT: and a4, a4, a3
+; RV32IMZBS-NEXT: and s8, a5, a3
+; RV32IMZBS-NEXT: bexti a1, a1, 30
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi a2, a0, -1
+; RV32IMZBS-NEXT: slli a3, s2, 30
+; RV32IMZBS-NEXT: and s7, a1, a3
+; RV32IMZBS-NEXT: slli a0, t1, 30
+; RV32IMZBS-NEXT: and a2, a2, a0
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a1, s4, 1
+; RV32IMZBS-NEXT: srli s4, s4, 31
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: andi ra, s9, 1
+; RV32IMZBS-NEXT: seqz ra, ra
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi ra, ra, -1
+; RV32IMZBS-NEXT: slli s1, s2, 31
+; RV32IMZBS-NEXT: and s2, a1, s2
+; RV32IMZBS-NEXT: and ra, ra, t1
+; RV32IMZBS-NEXT: and a3, a1, t1
+; RV32IMZBS-NEXT: slli a1, t1, 31
+; RV32IMZBS-NEXT: srli t1, s9, 31
+; RV32IMZBS-NEXT: seqz s4, s4
+; RV32IMZBS-NEXT: seqz t1, t1
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: and a0, s4, s1
+; RV32IMZBS-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, t1, a1
+; RV32IMZBS-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, s4, a1
+; RV32IMZBS-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, t5
+; RV32IMZBS-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a0, s3, s6
+; RV32IMZBS-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s7, a1, a0
-; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s8, a1, a0
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a7, a1, a0
-; RV32IMZBS-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s11, a1, a0
-; RV32IMZBS-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
+; RV32IMZBS-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 380(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 368(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 364(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
+; RV32IMZBS-NEXT: xor a0, a0, a6
+; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a0, s0, s5
; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, ra
-; RV32IMZBS-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s9
-; RV32IMZBS-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s5
-; RV32IMZBS-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t6
-; RV32IMZBS-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a6
-; RV32IMZBS-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s0
-; RV32IMZBS-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t2
-; RV32IMZBS-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t0
-; RV32IMZBS-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t1
-; RV32IMZBS-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a4
-; RV32IMZBS-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 732(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, a1, a2
-; RV32IMZBS-NEXT: slli a1, a1, 24
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli a0, a4, 8
-; RV32IMZBS-NEXT: and a0, a0, a2
-; RV32IMZBS-NEXT: srli a1, a4, 24
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a4, a2
-; RV32IMZBS-NEXT: slli a4, a4, 24
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a0, a4, a0
-; RV32IMZBS-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a3, a3, a5
-; RV32IMZBS-NEXT: sw a3, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t4, t3
-; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t5, s1
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, s2, s3
-; RV32IMZBS-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, s4, s6
-; RV32IMZBS-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, s7, s8
-; RV32IMZBS-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, a7, s11
-; RV32IMZBS-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 432(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli a0, s10, 8
-; RV32IMZBS-NEXT: and a0, a0, a2
-; RV32IMZBS-NEXT: srli a1, s10, 24
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s6, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: andi a0, s6, 2
-; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 1
-; RV32IMZBS-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 4
-; RV32IMZBS-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 8
-; RV32IMZBS-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 16
-; RV32IMZBS-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 32
-; RV32IMZBS-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 64
-; RV32IMZBS-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 128
-; RV32IMZBS-NEXT: sw a0, 264(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 256
-; RV32IMZBS-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 512
-; RV32IMZBS-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s6, 1024
-; RV32IMZBS-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw t3, 728(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, s6, t3
-; RV32IMZBS-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 1
-; RV32IMZBS-NEXT: and a0, s6, a3
-; RV32IMZBS-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t1, 2
-; RV32IMZBS-NEXT: and a0, s6, t1
-; RV32IMZBS-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s0, 4
-; RV32IMZBS-NEXT: and a0, s6, s0
-; RV32IMZBS-NEXT: sw a0, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t4, 8
-; RV32IMZBS-NEXT: and a0, s6, t4
-; RV32IMZBS-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a5, 16
-; RV32IMZBS-NEXT: and a0, s6, a5
-; RV32IMZBS-NEXT: sw a0, 188(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t5, 32
-; RV32IMZBS-NEXT: and a0, s6, t5
-; RV32IMZBS-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s7, 64
-; RV32IMZBS-NEXT: and a0, s6, s7
-; RV32IMZBS-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s1, 128
-; RV32IMZBS-NEXT: and a0, s6, s1
-; RV32IMZBS-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s2, 256
-; RV32IMZBS-NEXT: and a0, s6, s2
-; RV32IMZBS-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 512
-; RV32IMZBS-NEXT: and a0, s6, a2
-; RV32IMZBS-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s4, 1024
-; RV32IMZBS-NEXT: and a0, s6, s4
-; RV32IMZBS-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s5, 2048
-; RV32IMZBS-NEXT: and a0, s6, s5
-; RV32IMZBS-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a7, 4096
-; RV32IMZBS-NEXT: and a0, s6, a7
-; RV32IMZBS-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 8192
-; RV32IMZBS-NEXT: and a0, s6, a1
-; RV32IMZBS-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 16384
-; RV32IMZBS-NEXT: and a0, s6, t0
-; RV32IMZBS-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a4, 32768
-; RV32IMZBS-NEXT: and a0, s6, a4
-; RV32IMZBS-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and a0, s6, a0
-; RV32IMZBS-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and a0, s6, a0
-; RV32IMZBS-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 262144
-; RV32IMZBS-NEXT: and a0, s6, a0
-; RV32IMZBS-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and a0, s6, a0
-; RV32IMZBS-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s11, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: andi a0, s11, 2
-; RV32IMZBS-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 1
-; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 4
-; RV32IMZBS-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 8
-; RV32IMZBS-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 16
-; RV32IMZBS-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 32
-; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 64
-; RV32IMZBS-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s3, s11, 128
-; RV32IMZBS-NEXT: sw s3, 20(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, s11, 256
-; RV32IMZBS-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi ra, s11, 512
-; RV32IMZBS-NEXT: andi a0, s11, 1024
-; RV32IMZBS-NEXT: and s8, s11, t3
-; RV32IMZBS-NEXT: and t2, s11, a3
-; RV32IMZBS-NEXT: sw t2, 24(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a6, s11, t1
-; RV32IMZBS-NEXT: sw a6, 28(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t6, s11, s0
-; RV32IMZBS-NEXT: and t3, s11, t4
-; RV32IMZBS-NEXT: and t1, s11, a5
-; RV32IMZBS-NEXT: and t4, s11, t5
-; RV32IMZBS-NEXT: and t5, s11, s7
-; RV32IMZBS-NEXT: and s0, s11, s1
-; RV32IMZBS-NEXT: and s1, s11, s2
-; RV32IMZBS-NEXT: and a5, s11, a2
-; RV32IMZBS-NEXT: sw a5, 32(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a3, s11, s4
-; RV32IMZBS-NEXT: sw a3, 36(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s2, s11, s5
-; RV32IMZBS-NEXT: and a2, s11, a7
-; RV32IMZBS-NEXT: and s9, s11, a1
-; RV32IMZBS-NEXT: and t0, s11, t0
-; RV32IMZBS-NEXT: and s7, s11, a4
-; RV32IMZBS-NEXT: lui a1, 65536
-; RV32IMZBS-NEXT: and a7, s11, a1
-; RV32IMZBS-NEXT: lui a1, 131072
-; RV32IMZBS-NEXT: and s6, s11, a1
-; RV32IMZBS-NEXT: lui a1, 262144
-; RV32IMZBS-NEXT: and s4, s11, a1
-; RV32IMZBS-NEXT: lui a1, 524288
-; RV32IMZBS-NEXT: and s5, s11, a1
-; RV32IMZBS-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s11, 12(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, s11
-; RV32IMZBS-NEXT: sw a4, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 216(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 212(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, a4
-; RV32IMZBS-NEXT: sw a4, 100(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s3
-; RV32IMZBS-NEXT: sw a4, 208(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a1, s3
-; RV32IMZBS-NEXT: sw a4, 204(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, ra
-; RV32IMZBS-NEXT: sw a4, 292(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a0
-; RV32IMZBS-NEXT: sw a4, 172(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s8
-; RV32IMZBS-NEXT: sw a4, 200(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t2
-; RV32IMZBS-NEXT: sw a4, 192(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a6
-; RV32IMZBS-NEXT: sw a4, 288(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t6
-; RV32IMZBS-NEXT: mv a6, t6
-; RV32IMZBS-NEXT: sw a4, 168(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t3
-; RV32IMZBS-NEXT: mv t2, t3
-; RV32IMZBS-NEXT: sw a4, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t1
-; RV32IMZBS-NEXT: mv t3, t1
-; RV32IMZBS-NEXT: sw a4, 184(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t4
-; RV32IMZBS-NEXT: mv t1, t4
-; RV32IMZBS-NEXT: sw a4, 180(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t5
-; RV32IMZBS-NEXT: mv t4, t5
-; RV32IMZBS-NEXT: sw a4, 280(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s0
-; RV32IMZBS-NEXT: mv t6, s0
-; RV32IMZBS-NEXT: sw a4, 164(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s1
-; RV32IMZBS-NEXT: mv s0, s1
-; RV32IMZBS-NEXT: sw a4, 324(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a5
-; RV32IMZBS-NEXT: sw a4, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a3
-; RV32IMZBS-NEXT: sw a4, 160(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s2
-; RV32IMZBS-NEXT: mv s1, s2
-; RV32IMZBS-NEXT: sw a4, 156(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a2
-; RV32IMZBS-NEXT: sw a4, 276(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s9
-; RV32IMZBS-NEXT: sw a4, 152(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, t0
-; RV32IMZBS-NEXT: sw a4, 320(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s7
-; RV32IMZBS-NEXT: sw a4, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, a7
-; RV32IMZBS-NEXT: sw a4, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a1, s6
-; RV32IMZBS-NEXT: sw a4, 96(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a5, a1, s4
-; RV32IMZBS-NEXT: sw a5, 148(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mv a5, s4
-; RV32IMZBS-NEXT: mul a1, a1, s5
-; RV32IMZBS-NEXT: mv t5, s5
-; RV32IMZBS-NEXT: sw a1, 272(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 312(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 300(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 296(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 284(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 264(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a0, s10, t2
+; RV32IMZBS-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, s2, a0
+; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 288(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, a0, a1
+; RV32IMZBS-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 300(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 312(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 188(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s2, s10, a1
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 208(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s4, s10, a1
-; RV32IMZBS-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 296(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 284(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 304(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s11, s10, s11
-; RV32IMZBS-NEXT: lw a1, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a1
-; RV32IMZBS-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, s3
-; RV32IMZBS-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, ra
-; RV32IMZBS-NEXT: sw a1, 188(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, a0
-; RV32IMZBS-NEXT: sw a1, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s8, s10, s8
-; RV32IMZBS-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul ra, s10, a0
-; RV32IMZBS-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, s10, a0
-; RV32IMZBS-NEXT: mul a1, s10, a6
-; RV32IMZBS-NEXT: sw a1, 252(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, t2
-; RV32IMZBS-NEXT: sw a1, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t2, s10, t3
-; RV32IMZBS-NEXT: mul t3, s10, t1
-; RV32IMZBS-NEXT: mul t1, s10, t4
-; RV32IMZBS-NEXT: mul a1, s10, t6
-; RV32IMZBS-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, s10, s0
-; RV32IMZBS-NEXT: sw a1, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, s10, a0
-; RV32IMZBS-NEXT: sw a1, 720(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s11, s11, s7
+; RV32IMZBS-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, ra, a0
+; RV32IMZBS-NEXT: lw a0, 320(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 296(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, a0, a1
+; RV32IMZBS-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, a0, a1
+; RV32IMZBS-NEXT: lw a0, 216(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 192(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, a0, s0
+; RV32IMZBS-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 124(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, a0, s1
+; RV32IMZBS-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, a0, a1
; RV32IMZBS-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t4, s10, a0
-; RV32IMZBS-NEXT: mul t6, s10, s1
-; RV32IMZBS-NEXT: mul a6, s10, a2
-; RV32IMZBS-NEXT: mul a0, s10, s9
-; RV32IMZBS-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, s10, t0
-; RV32IMZBS-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, s10, s7
-; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, s10, a7
-; RV32IMZBS-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, s10, s6
-; RV32IMZBS-NEXT: mul s5, s10, a5
-; RV32IMZBS-NEXT: mul a4, s10, t5
-; RV32IMZBS-NEXT: lw a1, 732(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and s3, s10, a1
-; RV32IMZBS-NEXT: slli s10, s10, 24
-; RV32IMZBS-NEXT: slli s3, s3, 8
-; RV32IMZBS-NEXT: or a5, s10, s3
-; RV32IMZBS-NEXT: lw s9, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s6, s9, 8
-; RV32IMZBS-NEXT: and s6, s6, a1
-; RV32IMZBS-NEXT: srli s7, s9, 24
-; RV32IMZBS-NEXT: or a7, s6, s7
-; RV32IMZBS-NEXT: and s7, s9, a1
-; RV32IMZBS-NEXT: slli s9, s9, 24
-; RV32IMZBS-NEXT: slli s7, s7, 8
-; RV32IMZBS-NEXT: or t0, s9, s7
-; RV32IMZBS-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, a2, a1
-; RV32IMZBS-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, a1, a2
-; RV32IMZBS-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 212(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, a1, a2
-; RV32IMZBS-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 204(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, a1, a2
-; RV32IMZBS-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 192(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, a1, a2
-; RV32IMZBS-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 180(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s9, a1, a2
-; RV32IMZBS-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 156(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, a1, a2
-; RV32IMZBS-NEXT: lw a1, 148(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 96(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, a2, a1
-; RV32IMZBS-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 132(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 116(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 104(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor s4, s2, s4
-; RV32IMZBS-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, a0, a1
+; RV32IMZBS-NEXT: xor s4, a4, a2
+; RV32IMZBS-NEXT: xor s5, t0, t6
+; RV32IMZBS-NEXT: xor t0, t4, t3
+; RV32IMZBS-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a0, a1
+; RV32IMZBS-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, a0, a1
+; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a0, a1
+; RV32IMZBS-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, a0, a1
+; RV32IMZBS-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, a2, a1
-; RV32IMZBS-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 368(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 360(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 344(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: sw a1, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, s11, a1
-; RV32IMZBS-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a1, a2
-; RV32IMZBS-NEXT: xor s8, s8, ra
-; RV32IMZBS-NEXT: xor t2, t2, t3
-; RV32IMZBS-NEXT: xor t3, t4, t6
-; RV32IMZBS-NEXT: xor t4, a0, s5
-; RV32IMZBS-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, a0, a1
+; RV32IMZBS-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a3, a7
+; RV32IMZBS-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 324(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 280(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: lw a2, 236(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 228(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: lw a3, 152(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 148(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: lw a4, 84(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 76(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 44(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a6, 40(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, a6
+; RV32IMZBS-NEXT: lw a6, 8(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, s8, a6
+; RV32IMZBS-NEXT: lw s8, 344(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 4(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s8, s9
+; RV32IMZBS-NEXT: sw s8, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 508(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 564(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 488(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 484(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 460(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 472(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 472(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 416(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 460(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 372(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 424(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 664(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 400(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s8, s9
+; RV32IMZBS-NEXT: sw s8, 440(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 436(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 432(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 544(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 424(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 464(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 392(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 416(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 420(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 404(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 420(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 380(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 396(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, s8, s10
+; RV32IMZBS-NEXT: lw s8, 308(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 368(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 240(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 364(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 172(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 360(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 92(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 332(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 48(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 312(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, s9, s8
+; RV32IMZBS-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s11, s8
+; RV32IMZBS-NEXT: sw s8, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s8, ra, s7
+; RV32IMZBS-NEXT: lw s7, 316(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, s6, s7
+; RV32IMZBS-NEXT: sw s6, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s6, 248(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s0, s6
+; RV32IMZBS-NEXT: sw s0, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s0, 180(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, s0
+; RV32IMZBS-NEXT: lw s0, 100(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s2, s0
+; RV32IMZBS-NEXT: sw s0, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s0, 52(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s3, s0
+; RV32IMZBS-NEXT: sw s0, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s4, s0
+; RV32IMZBS-NEXT: sw s0, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor t0, s5, t0
+; RV32IMZBS-NEXT: sw t0, 564(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t1, t0
+; RV32IMZBS-NEXT: sw t0, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t2, t0
+; RV32IMZBS-NEXT: sw t0, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t3, t0
+; RV32IMZBS-NEXT: sw t0, 572(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 492(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t4, t0
+; RV32IMZBS-NEXT: sw t0, 544(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 428(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t5, t0
+; RV32IMZBS-NEXT: sw t0, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 388(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t6, t0
+; RV32IMZBS-NEXT: sw t0, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a0, a7, a0
+; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a0
+; RV32IMZBS-NEXT: lw a0, 196(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a3, a0
+; RV32IMZBS-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 120(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a4, a0
+; RV32IMZBS-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a5, a0
+; RV32IMZBS-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a6, a0
+; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lui a4, 61681
+; RV32IMZBS-NEXT: addi a4, a4, -241
+; RV32IMZBS-NEXT: sw a4, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a3, a0, 4
+; RV32IMZBS-NEXT: and a6, a0, a4
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: slli a6, a6, 4
+; RV32IMZBS-NEXT: or a0, a3, a6
+; RV32IMZBS-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a6, a0, 4
+; RV32IMZBS-NEXT: and t1, a0, a4
+; RV32IMZBS-NEXT: and a6, a6, a4
+; RV32IMZBS-NEXT: slli t1, t1, 4
+; RV32IMZBS-NEXT: or a0, a6, t1
+; RV32IMZBS-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 340(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 336(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 328(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, a1, a0
+; RV32IMZBS-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: or a0, t0, a7
-; RV32IMZBS-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a5, t5, s1
-; RV32IMZBS-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, s3, a0
-; RV32IMZBS-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, s6, a0
-; RV32IMZBS-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, s7, a0
-; RV32IMZBS-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, s9, a0
-; RV32IMZBS-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s10, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s0, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s4, a0
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s2, a0
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 316(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 432(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s11, a1, a0
+; RV32IMZBS-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, s11
-; RV32IMZBS-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, a1, a0
-; RV32IMZBS-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, a2, a0
-; RV32IMZBS-NEXT: xor ra, s8, a3
-; RV32IMZBS-NEXT: xor a0, t2, t1
-; RV32IMZBS-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t3, a6
-; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t4, a4
-; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t6, s5
-; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 416(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 520(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t2, a0, 4
+; RV32IMZBS-NEXT: and t3, a0, a4
+; RV32IMZBS-NEXT: and t2, t2, a4
+; RV32IMZBS-NEXT: slli t3, t3, 4
+; RV32IMZBS-NEXT: or a0, t2, t3
+; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t3, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, a4
+; RV32IMZBS-NEXT: and t3, t3, a4
+; RV32IMZBS-NEXT: slli a0, a0, 4
+; RV32IMZBS-NEXT: or a0, t3, a0
+; RV32IMZBS-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, a1, a0
-; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s9, a0, a1
-; RV32IMZBS-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s1, a0
-; RV32IMZBS-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, a1, a0
-; RV32IMZBS-NEXT: xor s4, a5, a7
-; RV32IMZBS-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, t0, a0
+; RV32IMZBS-NEXT: xor a0, s10, a0
+; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 328(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s5, a1, a0
+; RV32IMZBS-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, a1, a0
; RV32IMZBS-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t5, a0
-; RV32IMZBS-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, s6, a0
-; RV32IMZBS-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, a1, a0
+; RV32IMZBS-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s9, a0
+; RV32IMZBS-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, s8, a0
+; RV32IMZBS-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s0, a0
+; RV32IMZBS-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, a0
+; RV32IMZBS-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, a1, a0
+; RV32IMZBS-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a1, a0
-; RV32IMZBS-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a1, a0
+; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a7, a1, a0
-; RV32IMZBS-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t0, a1, a0
-; RV32IMZBS-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t1, a0, a1
-; RV32IMZBS-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 416(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a0
-; RV32IMZBS-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a5, 428(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a2, a0
+; RV32IMZBS-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 464(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a5, a5, a0
-; RV32IMZBS-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a0
-; RV32IMZBS-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a3, 440(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a1, a0
+; RV32IMZBS-NEXT: lw a0, 96(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 452(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a3, a3, a0
-; RV32IMZBS-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, a0, s10
-; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, s11, a0
-; RV32IMZBS-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor ra, ra, a0
-; RV32IMZBS-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, a0, ra
+; RV32IMZBS-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 604(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, s6, a1
-; RV32IMZBS-NEXT: lw s6, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 520(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: lw a2, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, s8, a2
+; RV32IMZBS-NEXT: lw s8, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s11, s8, s11
+; RV32IMZBS-NEXT: lw s8, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: lw s9, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s10, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, s10, s9
+; RV32IMZBS-NEXT: lw s10, 480(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 528(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, s7, s10
+; RV32IMZBS-NEXT: lw s7, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s5, s7, s5
+; RV32IMZBS-NEXT: lw s7, 284(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor s6, s6, s7
-; RV32IMZBS-NEXT: sw s6, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s6, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, s7, s6
-; RV32IMZBS-NEXT: sw s6, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s6, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, s7, s6
-; RV32IMZBS-NEXT: lw s6, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, s8, s6
-; RV32IMZBS-NEXT: xor s5, s9, s5
-; RV32IMZBS-NEXT: lw s6, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s1, s6
-; RV32IMZBS-NEXT: lw s6, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, s2, s6
-; RV32IMZBS-NEXT: lw s6, 620(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, s3, s6
+; RV32IMZBS-NEXT: lw s7, 200(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, s7
+; RV32IMZBS-NEXT: lw s7, 104(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, s7
; RV32IMZBS-NEXT: xor s0, s4, s0
-; RV32IMZBS-NEXT: sw s0, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s0, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, s0
-; RV32IMZBS-NEXT: lw s0, 324(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t4, s0
-; RV32IMZBS-NEXT: lw s0, 320(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, t5, s0
-; RV32IMZBS-NEXT: xor s0, t6, t2
-; RV32IMZBS-NEXT: lw t2, 312(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, t2
-; RV32IMZBS-NEXT: lw t2, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, t2
-; RV32IMZBS-NEXT: lw t2, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, t2
+; RV32IMZBS-NEXT: lw s4, 292(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, s4
+; RV32IMZBS-NEXT: lw s4, 204(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, s4
+; RV32IMZBS-NEXT: lw s4, 108(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, s4
+; RV32IMZBS-NEXT: xor t2, t6, t2
+; RV32IMZBS-NEXT: lw t6, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, t3, t6
+; RV32IMZBS-NEXT: lw t6, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t6
+; RV32IMZBS-NEXT: lw t6, 496(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t6
; RV32IMZBS-NEXT: xor a4, t1, a4
-; RV32IMZBS-NEXT: lw t1, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t1, 300(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a5, a5, t1
-; RV32IMZBS-NEXT: lw t1, 452(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t1
-; RV32IMZBS-NEXT: lw t1, 460(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t1, 212(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a6, t1
+; RV32IMZBS-NEXT: lw t1, 116(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a3, a3, t1
-; RV32IMZBS-NEXT: xor t2, s10, s11
-; RV32IMZBS-NEXT: lw t1, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, ra, t1
-; RV32IMZBS-NEXT: lw t1, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, t1
-; RV32IMZBS-NEXT: lw t1, 256(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, a1, t1
-; RV32IMZBS-NEXT: lui a1, 61681
-; RV32IMZBS-NEXT: addi s4, a1, -241
-; RV32IMZBS-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s9, a1, 4
-; RV32IMZBS-NEXT: and s10, a1, s4
-; RV32IMZBS-NEXT: and s9, s9, s4
-; RV32IMZBS-NEXT: slli s10, s10, 4
-; RV32IMZBS-NEXT: or s9, s9, s10
-; RV32IMZBS-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s10, a1, 4
-; RV32IMZBS-NEXT: and s11, a1, s4
-; RV32IMZBS-NEXT: and s10, s10, s4
-; RV32IMZBS-NEXT: slli s11, s11, 4
-; RV32IMZBS-NEXT: or s10, s10, s11
-; RV32IMZBS-NEXT: lw a1, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, a1, s6
-; RV32IMZBS-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, s7, a1
-; RV32IMZBS-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, s8, a1
-; RV32IMZBS-NEXT: xor s1, s5, s1
-; RV32IMZBS-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, s2, a1
-; RV32IMZBS-NEXT: lw a1, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, s3, a1
-; RV32IMZBS-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s5, a1, 4
-; RV32IMZBS-NEXT: and s11, a1, s4
-; RV32IMZBS-NEXT: and s5, s5, s4
-; RV32IMZBS-NEXT: slli s11, s11, 4
-; RV32IMZBS-NEXT: or s5, s5, s11
-; RV32IMZBS-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s11, a1, 4
-; RV32IMZBS-NEXT: and ra, a1, s4
-; RV32IMZBS-NEXT: and s11, s11, s4
-; RV32IMZBS-NEXT: slli ra, ra, 4
-; RV32IMZBS-NEXT: or s11, s11, ra
-; RV32IMZBS-NEXT: lw a1, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, a1, t3
-; RV32IMZBS-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t4, a1
-; RV32IMZBS-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, t5, a1
-; RV32IMZBS-NEXT: xor a6, s0, a6
-; RV32IMZBS-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, a1
-; RV32IMZBS-NEXT: lw a1, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a1
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: lw a1, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a1
-; RV32IMZBS-NEXT: lw a1, 464(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a1
-; RV32IMZBS-NEXT: xor a5, t2, t6
-; RV32IMZBS-NEXT: lw a1, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, a1
-; RV32IMZBS-NEXT: xor t2, s6, s7
-; RV32IMZBS-NEXT: lw a1, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, s8, a1
-; RV32IMZBS-NEXT: xor s0, s1, s2
-; RV32IMZBS-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s3, a1
-; RV32IMZBS-NEXT: xor t3, t3, t4
-; RV32IMZBS-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t5, a1
-; RV32IMZBS-NEXT: xor a6, a6, a7
-; RV32IMZBS-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, t0, a1
-; RV32IMZBS-NEXT: xor a2, a4, a2
-; RV32IMZBS-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a1
-; RV32IMZBS-NEXT: xor a0, a5, a0
-; RV32IMZBS-NEXT: lw a1, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, t1, a1
-; RV32IMZBS-NEXT: xor a5, t2, t6
+; RV32IMZBS-NEXT: xor a0, ra, a0
+; RV32IMZBS-NEXT: lw t1, 592(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, t1
+; RV32IMZBS-NEXT: lw t1, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a2, t1
+; RV32IMZBS-NEXT: xor t6, s11, s8
+; RV32IMZBS-NEXT: lw a2, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, s9, a2
+; RV32IMZBS-NEXT: lw a2, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s10, a2
+; RV32IMZBS-NEXT: xor s5, s5, s6
+; RV32IMZBS-NEXT: lw a2, 220(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, a2
+; RV32IMZBS-NEXT: lw a2, 128(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, a2
; RV32IMZBS-NEXT: xor s0, s0, s1
-; RV32IMZBS-NEXT: xor t0, t3, t4
-; RV32IMZBS-NEXT: xor a6, a6, a7
-; RV32IMZBS-NEXT: xor a3, a2, a3
-; RV32IMZBS-NEXT: xor a0, a0, a4
+; RV32IMZBS-NEXT: lw a2, 224(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, a2
+; RV32IMZBS-NEXT: lw a2, 132(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, a2
+; RV32IMZBS-NEXT: xor t2, t2, t3
+; RV32IMZBS-NEXT: lw a2, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, a2
+; RV32IMZBS-NEXT: lw a2, 512(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, a2
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a2, 232(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a6, a2
+; RV32IMZBS-NEXT: lw a2, 136(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a2
; RV32IMZBS-NEXT: lui a2, 209715
-; RV32IMZBS-NEXT: addi t5, a2, 819
-; RV32IMZBS-NEXT: srli a4, s9, 2
-; RV32IMZBS-NEXT: and a7, s9, t5
-; RV32IMZBS-NEXT: and a4, a4, t5
-; RV32IMZBS-NEXT: slli a7, a7, 2
-; RV32IMZBS-NEXT: or a4, a4, a7
-; RV32IMZBS-NEXT: srli a7, s10, 2
-; RV32IMZBS-NEXT: and t1, s10, t5
-; RV32IMZBS-NEXT: and a7, a7, t5
-; RV32IMZBS-NEXT: slli t1, t1, 2
-; RV32IMZBS-NEXT: or a7, a7, t1
-; RV32IMZBS-NEXT: lw a1, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a1
-; RV32IMZBS-NEXT: lw a1, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, s0, a1
-; RV32IMZBS-NEXT: srli t2, s5, 2
-; RV32IMZBS-NEXT: and t3, s5, t5
-; RV32IMZBS-NEXT: and t2, t2, t5
+; RV32IMZBS-NEXT: addi s8, a2, 819
+; RV32IMZBS-NEXT: lw a2, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a6, a2, 2
+; RV32IMZBS-NEXT: and t3, a2, s8
+; RV32IMZBS-NEXT: and a6, a6, s8
; RV32IMZBS-NEXT: slli t3, t3, 2
-; RV32IMZBS-NEXT: or t2, t2, t3
-; RV32IMZBS-NEXT: srli t3, s11, 2
-; RV32IMZBS-NEXT: and t4, s11, t5
-; RV32IMZBS-NEXT: and t3, t3, t5
-; RV32IMZBS-NEXT: slli t4, t4, 2
-; RV32IMZBS-NEXT: or t3, t3, t4
-; RV32IMZBS-NEXT: lw a1, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a1
-; RV32IMZBS-NEXT: lw a1, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, a1
-; RV32IMZBS-NEXT: lw a1, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a3, a1
-; RV32IMZBS-NEXT: sw a1, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: or a6, a6, t3
+; RV32IMZBS-NEXT: lw a2, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t3, a2, 2
+; RV32IMZBS-NEXT: and s1, a2, s8
+; RV32IMZBS-NEXT: and t3, t3, s8
+; RV32IMZBS-NEXT: slli s1, s1, 2
+; RV32IMZBS-NEXT: or t3, t3, s1
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t1, a5
-; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, a6, t0
-; RV32IMZBS-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t4, 349525
-; RV32IMZBS-NEXT: addi t4, t4, 1365
-; RV32IMZBS-NEXT: srli a0, a4, 1
-; RV32IMZBS-NEXT: and a4, a4, t4
-; RV32IMZBS-NEXT: and a0, a0, t4
-; RV32IMZBS-NEXT: slli a4, a4, 1
-; RV32IMZBS-NEXT: or a0, a0, a4
-; RV32IMZBS-NEXT: srli a3, a7, 1
-; RV32IMZBS-NEXT: and a4, a7, t4
-; RV32IMZBS-NEXT: and a3, a3, t4
-; RV32IMZBS-NEXT: slli a4, a4, 1
-; RV32IMZBS-NEXT: or a4, a3, a4
-; RV32IMZBS-NEXT: srli a1, t2, 1
-; RV32IMZBS-NEXT: and a6, t2, t4
-; RV32IMZBS-NEXT: and a1, a1, t4
+; RV32IMZBS-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, t1, a1
+; RV32IMZBS-NEXT: xor t1, t6, s4
+; RV32IMZBS-NEXT: lw a2, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, s7, a2
+; RV32IMZBS-NEXT: lw a2, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli s1, a2, 2
+; RV32IMZBS-NEXT: and s4, a2, s8
+; RV32IMZBS-NEXT: and s1, s1, s8
+; RV32IMZBS-NEXT: slli s4, s4, 2
+; RV32IMZBS-NEXT: or s1, s1, s4
+; RV32IMZBS-NEXT: lw a2, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli s4, a2, 2
+; RV32IMZBS-NEXT: and s6, a2, s8
+; RV32IMZBS-NEXT: and s4, s4, s8
+; RV32IMZBS-NEXT: slli s6, s6, 2
+; RV32IMZBS-NEXT: or s4, s4, s6
+; RV32IMZBS-NEXT: xor s2, s5, s2
+; RV32IMZBS-NEXT: lw a2, 160(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, a2
+; RV32IMZBS-NEXT: xor t4, s0, t4
+; RV32IMZBS-NEXT: lw a2, 156(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, a2
+; RV32IMZBS-NEXT: xor a7, t2, a7
+; RV32IMZBS-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, a2
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 164(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a3, a5
+; RV32IMZBS-NEXT: lui a2, 349525
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: addi s5, a2, 1365
+; RV32IMZBS-NEXT: xor a1, t1, t6
+; RV32IMZBS-NEXT: srli t1, a6, 1
+; RV32IMZBS-NEXT: and a6, a6, s5
+; RV32IMZBS-NEXT: xor t2, s2, s3
+; RV32IMZBS-NEXT: srli t6, t3, 1
+; RV32IMZBS-NEXT: and t3, t3, s5
+; RV32IMZBS-NEXT: xor t4, t4, t5
+; RV32IMZBS-NEXT: srli t5, s1, 1
+; RV32IMZBS-NEXT: and s1, s1, s5
+; RV32IMZBS-NEXT: xor a7, a7, t0
+; RV32IMZBS-NEXT: srli t0, s4, 1
+; RV32IMZBS-NEXT: and s0, s4, s5
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: and a5, t1, s5
; RV32IMZBS-NEXT: slli a6, a6, 1
-; RV32IMZBS-NEXT: or a6, a1, a6
-; RV32IMZBS-NEXT: srli a1, t3, 1
-; RV32IMZBS-NEXT: and a7, t3, t4
-; RV32IMZBS-NEXT: and a1, a1, t4
-; RV32IMZBS-NEXT: slli a7, a7, 1
-; RV32IMZBS-NEXT: or s11, a1, a7
-; RV32IMZBS-NEXT: andi a1, a4, 2
-; RV32IMZBS-NEXT: sw a1, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a7, a4, 1
-; RV32IMZBS-NEXT: andi a5, a4, 4
-; RV32IMZBS-NEXT: andi t0, a4, 8
-; RV32IMZBS-NEXT: andi t2, a4, 16
-; RV32IMZBS-NEXT: andi t3, a4, 32
-; RV32IMZBS-NEXT: andi t1, a4, 64
-; RV32IMZBS-NEXT: andi a1, a4, 128
-; RV32IMZBS-NEXT: sw a1, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t6, a4, 256
-; RV32IMZBS-NEXT: andi s0, a4, 512
-; RV32IMZBS-NEXT: andi s2, a4, 1024
-; RV32IMZBS-NEXT: lw a1, 728(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and s8, a4, a1
-; RV32IMZBS-NEXT: lui s1, 1
-; RV32IMZBS-NEXT: and s9, a4, s1
-; RV32IMZBS-NEXT: lui s1, 2
-; RV32IMZBS-NEXT: and s1, a4, s1
-; RV32IMZBS-NEXT: lui s3, 4
-; RV32IMZBS-NEXT: and s3, a4, s3
-; RV32IMZBS-NEXT: lui s5, 8
-; RV32IMZBS-NEXT: and a2, a4, s5
-; RV32IMZBS-NEXT: sw a2, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s5, 16
-; RV32IMZBS-NEXT: and s5, a4, s5
-; RV32IMZBS-NEXT: lui s6, 32
-; RV32IMZBS-NEXT: and s6, a4, s6
-; RV32IMZBS-NEXT: lui s7, 64
-; RV32IMZBS-NEXT: and s7, a4, s7
-; RV32IMZBS-NEXT: lui s10, 128
-; RV32IMZBS-NEXT: and s10, a4, s10
-; RV32IMZBS-NEXT: lui ra, 256
-; RV32IMZBS-NEXT: and ra, a4, ra
-; RV32IMZBS-NEXT: lui a2, 512
-; RV32IMZBS-NEXT: and a2, a4, a2
-; RV32IMZBS-NEXT: sw a2, 684(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 1024
-; RV32IMZBS-NEXT: and a2, a4, a2
+; RV32IMZBS-NEXT: and t1, t6, s5
+; RV32IMZBS-NEXT: slli t3, t3, 1
+; RV32IMZBS-NEXT: lw a2, 664(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, a0, a2
+; RV32IMZBS-NEXT: lw a0, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, a1, a0
+; RV32IMZBS-NEXT: and t5, t5, s5
+; RV32IMZBS-NEXT: slli s1, s1, 1
+; RV32IMZBS-NEXT: and s3, t0, s5
+; RV32IMZBS-NEXT: slli s0, s0, 1
+; RV32IMZBS-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, t2, a0
+; RV32IMZBS-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, a0
+; RV32IMZBS-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a7, a0
+; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a4, a0
+; RV32IMZBS-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or a0, a5, a6
+; RV32IMZBS-NEXT: or a1, t1, t3
+; RV32IMZBS-NEXT: srli a4, t3, 31
+; RV32IMZBS-NEXT: xor a2, s2, t6
+; RV32IMZBS-NEXT: sw a2, 696(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or a3, t5, s1
+; RV32IMZBS-NEXT: or t1, s3, s0
+; RV32IMZBS-NEXT: srli s0, s0, 31
+; RV32IMZBS-NEXT: xor a2, t4, t2
+; RV32IMZBS-NEXT: sw a2, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, a0, 1
+; RV32IMZBS-NEXT: andi a6, a1, 2
+; RV32IMZBS-NEXT: slli a7, a0, 2
+; RV32IMZBS-NEXT: andi t2, a1, 4
+; RV32IMZBS-NEXT: slli t3, a0, 3
+; RV32IMZBS-NEXT: andi t4, a1, 8
+; RV32IMZBS-NEXT: slli t5, a0, 4
+; RV32IMZBS-NEXT: andi t6, a1, 16
+; RV32IMZBS-NEXT: slli s1, a0, 5
+; RV32IMZBS-NEXT: andi s2, a1, 32
+; RV32IMZBS-NEXT: slli s3, a0, 31
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a2, a4, s3
+; RV32IMZBS-NEXT: sw a2, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a3, 31
+; RV32IMZBS-NEXT: seqz s0, s0
+; RV32IMZBS-NEXT: addi s0, s0, -1
+; RV32IMZBS-NEXT: and a4, s0, a4
+; RV32IMZBS-NEXT: sw a4, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a0, 6
+; RV32IMZBS-NEXT: seqz a6, a6
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and a2, a6, a5
; RV32IMZBS-NEXT: sw a2, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 2048
-; RV32IMZBS-NEXT: and a2, a4, a2
-; RV32IMZBS-NEXT: sw a2, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 4096
-; RV32IMZBS-NEXT: and a2, a4, a2
+; RV32IMZBS-NEXT: andi a5, a1, 64
+; RV32IMZBS-NEXT: seqz a6, t2
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and a2, a6, a7
+; RV32IMZBS-NEXT: sw a2, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, a0, 7
+; RV32IMZBS-NEXT: seqz a7, t4
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: and a2, a7, t3
; RV32IMZBS-NEXT: sw a2, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 8192
-; RV32IMZBS-NEXT: and a2, a4, a2
+; RV32IMZBS-NEXT: andi a7, a1, 128
+; RV32IMZBS-NEXT: seqz t2, t6
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: and a2, t2, t5
+; RV32IMZBS-NEXT: sw a2, 656(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a0, 8
+; RV32IMZBS-NEXT: seqz t3, s2
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: and a2, t3, s1
+; RV32IMZBS-NEXT: sw a2, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi t3, a1, 256
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a4, a5, a4
+; RV32IMZBS-NEXT: sw a4, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a0, 9
+; RV32IMZBS-NEXT: seqz a5, a7
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a2, a5, a6
+; RV32IMZBS-NEXT: sw a2, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a5, a1, 512
+; RV32IMZBS-NEXT: seqz a6, t3
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and a2, a6, t2
+; RV32IMZBS-NEXT: sw a2, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, a0, 10
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a4, a5, a4
+; RV32IMZBS-NEXT: sw a4, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a1, 1024
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 664(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a5, a1
+; RV32IMZBS-NEXT: bexti a4, a5, 11
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 11
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 12
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 12
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 13
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 13
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 14
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 14
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 15
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 15
+; RV32IMZBS-NEXT: and a2, a4, a6
; RV32IMZBS-NEXT: sw a2, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 16384
-; RV32IMZBS-NEXT: and a2, a4, a2
-; RV32IMZBS-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 32768
-; RV32IMZBS-NEXT: and a2, a4, a2
+; RV32IMZBS-NEXT: bexti a4, a5, 16
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 16
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 592(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 17
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 17
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 18
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 18
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 19
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 19
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 20
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 20
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 21
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 21
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 22
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 22
+; RV32IMZBS-NEXT: and a2, a4, a6
; RV32IMZBS-NEXT: sw a2, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 65536
-; RV32IMZBS-NEXT: and a2, a4, a2
+; RV32IMZBS-NEXT: bexti a4, a5, 23
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 23
+; RV32IMZBS-NEXT: and a2, a4, a6
; RV32IMZBS-NEXT: sw a2, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 131072
-; RV32IMZBS-NEXT: and a2, a4, a2
+; RV32IMZBS-NEXT: bexti a4, a5, 24
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 24
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 576(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 25
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 25
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 26
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 26
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 27
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 27
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 28
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 28
+; RV32IMZBS-NEXT: and a2, a4, a6
+; RV32IMZBS-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, a5, 29
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a6, a0, 29
+; RV32IMZBS-NEXT: and a2, a4, a6
; RV32IMZBS-NEXT: sw a2, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 262144
-; RV32IMZBS-NEXT: and a3, a4, a2
-; RV32IMZBS-NEXT: sw a3, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 524288
-; RV32IMZBS-NEXT: and a3, a4, a3
-; RV32IMZBS-NEXT: lw a4, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, a7
-; RV32IMZBS-NEXT: sw a4, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, a5
-; RV32IMZBS-NEXT: sw a4, 628(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, t0
-; RV32IMZBS-NEXT: sw a4, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, t2
-; RV32IMZBS-NEXT: sw a4, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, t3
-; RV32IMZBS-NEXT: sw a4, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, t1
-; RV32IMZBS-NEXT: sw a4, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, t6
-; RV32IMZBS-NEXT: sw a4, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s0
-; RV32IMZBS-NEXT: sw a4, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s2
-; RV32IMZBS-NEXT: sw a4, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s8
-; RV32IMZBS-NEXT: sw a4, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s9
-; RV32IMZBS-NEXT: sw a4, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s1
-; RV32IMZBS-NEXT: sw a4, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s3
-; RV32IMZBS-NEXT: sw a4, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s5
-; RV32IMZBS-NEXT: sw a4, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s6
-; RV32IMZBS-NEXT: sw a4, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s7
-; RV32IMZBS-NEXT: sw a4, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, s10
-; RV32IMZBS-NEXT: sw a4, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, ra
-; RV32IMZBS-NEXT: sw a4, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 684(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a0, a3
-; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t2, s11, a1
-; RV32IMZBS-NEXT: lui a0, 1
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 2
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 4
-; RV32IMZBS-NEXT: and s1, s11, a0
-; RV32IMZBS-NEXT: lui a0, 8
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a1, a1, 1
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a1, a1, a0
+; RV32IMZBS-NEXT: sw a1, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a0, a0, 30
+; RV32IMZBS-NEXT: bexti a1, a5, 30
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a0, a1, a0
; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 32
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 64
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t1, 2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 1
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 4
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 2
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 128
-; RV32IMZBS-NEXT: and s7, s11, a0
-; RV32IMZBS-NEXT: lui a0, 256
-; RV32IMZBS-NEXT: and s10, s11, a0
-; RV32IMZBS-NEXT: lui a0, 512
-; RV32IMZBS-NEXT: and ra, s11, a0
-; RV32IMZBS-NEXT: lui a0, 1024
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t1, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 3
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 2048
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t1, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 4
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 4096
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t1, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 5
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 6
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 7
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 8192
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 16384
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, t1, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 8
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 32768
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, s11, a2
-; RV32IMZBS-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and s2, s11, a0
-; RV32IMZBS-NEXT: andi s8, s11, 2
-; RV32IMZBS-NEXT: andi a0, s11, 1
-; RV32IMZBS-NEXT: andi a1, s11, 4
-; RV32IMZBS-NEXT: andi a2, s11, 8
-; RV32IMZBS-NEXT: andi a3, s11, 16
-; RV32IMZBS-NEXT: andi a4, s11, 32
-; RV32IMZBS-NEXT: andi a5, s11, 64
-; RV32IMZBS-NEXT: andi a7, s11, 128
-; RV32IMZBS-NEXT: andi t0, s11, 256
-; RV32IMZBS-NEXT: andi t1, s11, 512
-; RV32IMZBS-NEXT: andi s11, s11, 1024
-; RV32IMZBS-NEXT: mul s9, a6, s8
-; RV32IMZBS-NEXT: mul s6, a6, a0
-; RV32IMZBS-NEXT: mul s8, a6, a1
-; RV32IMZBS-NEXT: mul s3, a6, a2
-; RV32IMZBS-NEXT: mul s5, a6, a3
-; RV32IMZBS-NEXT: mul s0, a6, a4
-; RV32IMZBS-NEXT: mul a0, a6, a5
+; RV32IMZBS-NEXT: andi a0, t1, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 9
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, a3, 10
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, a7
-; RV32IMZBS-NEXT: sw a0, 728(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t6, a6, t0
-; RV32IMZBS-NEXT: mul t3, a6, t1
-; RV32IMZBS-NEXT: mul a0, a6, s11
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, t2
-; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a0, t1
+; RV32IMZBS-NEXT: bexti a1, a0, 11
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 11
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 500(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 12
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli s0, a3, 12
+; RV32IMZBS-NEXT: and a1, a1, s0
+; RV32IMZBS-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 13
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 13
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 504(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 14
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 14
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 508(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 15
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 15
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 16
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 16
+; RV32IMZBS-NEXT: and s9, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 17
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 17
+; RV32IMZBS-NEXT: and s6, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 18
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 18
+; RV32IMZBS-NEXT: and s10, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 19
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 19
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 484(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 20
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli s1, a3, 20
+; RV32IMZBS-NEXT: and a1, a1, s1
+; RV32IMZBS-NEXT: sw a1, 492(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 21
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 21
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 496(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 22
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, a3, 22
+; RV32IMZBS-NEXT: and s0, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 23
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a5, a3, 23
+; RV32IMZBS-NEXT: and t6, a1, a5
+; RV32IMZBS-NEXT: bexti a5, a0, 24
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, a3, 24
+; RV32IMZBS-NEXT: and s1, a5, a6
+; RV32IMZBS-NEXT: bexti a6, a0, 25
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: slli a7, a3, 25
+; RV32IMZBS-NEXT: and s2, a6, a7
+; RV32IMZBS-NEXT: bexti a7, a0, 26
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: slli t2, a3, 26
+; RV32IMZBS-NEXT: and s3, a7, t2
+; RV32IMZBS-NEXT: bexti t2, a0, 27
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: slli t3, a3, 27
+; RV32IMZBS-NEXT: and s7, t2, t3
+; RV32IMZBS-NEXT: bexti t3, a0, 28
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: slli t0, a3, 28
+; RV32IMZBS-NEXT: and s4, t3, t0
+; RV32IMZBS-NEXT: bexti t0, a0, 29
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: slli a2, a3, 29
+; RV32IMZBS-NEXT: and t5, t0, a2
+; RV32IMZBS-NEXT: andi t0, t1, 1
+; RV32IMZBS-NEXT: seqz t0, t0
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: and t0, t0, a3
+; RV32IMZBS-NEXT: slli a3, a3, 30
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and t4, a0, a3
+; RV32IMZBS-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a1, a0
+; RV32IMZBS-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a0, a1
+; RV32IMZBS-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a0, a1
+; RV32IMZBS-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a0, a1
+; RV32IMZBS-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, a0, a1
+; RV32IMZBS-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a0, a3
; RV32IMZBS-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t1, a6, a0
-; RV32IMZBS-NEXT: mul s1, a6, s1
+; RV32IMZBS-NEXT: lw a4, 568(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a0, a4
; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s11, a6, a0
-; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a5, 560(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a0, a5
+; RV32IMZBS-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, a0
; RV32IMZBS-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s7, a6, s7
-; RV32IMZBS-NEXT: mul s10, a6, s10
-; RV32IMZBS-NEXT: mul ra, a6, ra
-; RV32IMZBS-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a6, a0
-; RV32IMZBS-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a7, a6, a0
-; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t2, a6, a0
-; RV32IMZBS-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s2
-; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 528(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 608(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a3, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a3
-; RV32IMZBS-NEXT: lw a3, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a4
-; RV32IMZBS-NEXT: lw a4, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a5, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: lw a5, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: xor s6, s6, s9
-; RV32IMZBS-NEXT: xor s3, s8, s3
-; RV32IMZBS-NEXT: xor s0, s5, s0
-; RV32IMZBS-NEXT: xor t3, t6, t3
-; RV32IMZBS-NEXT: xor t1, t1, s1
-; RV32IMZBS-NEXT: xor t6, s7, s10
-; RV32IMZBS-NEXT: xor a7, t0, a7
-; RV32IMZBS-NEXT: xor a0, s2, a0
-; RV32IMZBS-NEXT: lw t0, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 520(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw ra, 512(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, ra
+; RV32IMZBS-NEXT: lw ra, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s11, 488(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, ra, s11
+; RV32IMZBS-NEXT: xor s6, s9, s6
+; RV32IMZBS-NEXT: xor t6, s0, t6
+; RV32IMZBS-NEXT: xor t4, t5, t4
+; RV32IMZBS-NEXT: xor t1, t3, t1
+; RV32IMZBS-NEXT: lw t3, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a6, t3
+; RV32IMZBS-NEXT: lw t3, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t3
+; RV32IMZBS-NEXT: lw t3, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, t2, t3
+; RV32IMZBS-NEXT: lw t3, 604(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t3
+; RV32IMZBS-NEXT: lw t3, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t3
+; RV32IMZBS-NEXT: lw t3, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, t3
+; RV32IMZBS-NEXT: xor a0, t0, a0
+; RV32IMZBS-NEXT: lw t0, 540(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a1, a1, t0
-; RV32IMZBS-NEXT: lw t0, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t0, 524(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a2, a2, t0
-; RV32IMZBS-NEXT: lw t0, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, t0
-; RV32IMZBS-NEXT: lw t0, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, t0
-; RV32IMZBS-NEXT: lw t0, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, t0
-; RV32IMZBS-NEXT: xor t0, s6, s3
-; RV32IMZBS-NEXT: lw a6, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, s0, a6
-; RV32IMZBS-NEXT: lw a6, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, a6
-; RV32IMZBS-NEXT: xor t1, t1, s11
-; RV32IMZBS-NEXT: xor t6, t6, ra
-; RV32IMZBS-NEXT: xor a7, a7, t2
+; RV32IMZBS-NEXT: lw t0, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, ra, t0
+; RV32IMZBS-NEXT: xor t3, s6, s10
+; RV32IMZBS-NEXT: xor t5, t6, s1
+; RV32IMZBS-NEXT: lw t6, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, t6
+; RV32IMZBS-NEXT: xor a6, t1, a6
+; RV32IMZBS-NEXT: lw t1, 664(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t1
+; RV32IMZBS-NEXT: lw t1, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t2, t1
+; RV32IMZBS-NEXT: lw t2, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t2
+; RV32IMZBS-NEXT: lw t2, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t2
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: lw a2, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, t0, a2
+; RV32IMZBS-NEXT: lw t0, 484(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t3, t0
+; RV32IMZBS-NEXT: xor t2, t5, s2
+; RV32IMZBS-NEXT: xor a6, a6, a7
+; RV32IMZBS-NEXT: lw a7, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, t1, a7
+; RV32IMZBS-NEXT: lw t1, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t1
+; RV32IMZBS-NEXT: lw t1, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t1
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: lw a2, 492(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, t0, a2
+; RV32IMZBS-NEXT: xor t0, t2, s3
+; RV32IMZBS-NEXT: xor a6, a6, a7
+; RV32IMZBS-NEXT: lw a7, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a7
+; RV32IMZBS-NEXT: lw a7, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a7
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: lw a2, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: lw a3, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: lw a4, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a5, a4
-; RV32IMZBS-NEXT: xor a5, t0, s0
-; RV32IMZBS-NEXT: lw a6, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t3, a6
-; RV32IMZBS-NEXT: lw a6, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, a6
-; RV32IMZBS-NEXT: lw a6, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t6, a6
-; RV32IMZBS-NEXT: lw a6, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, a6
-; RV32IMZBS-NEXT: lw t3, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, t3
-; RV32IMZBS-NEXT: lw t3, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, t3
-; RV32IMZBS-NEXT: lw t3, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t3
-; RV32IMZBS-NEXT: lw t3, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, t3
-; RV32IMZBS-NEXT: lw t3, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, t3
-; RV32IMZBS-NEXT: lw a6, 728(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: lw a6, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a6
-; RV32IMZBS-NEXT: lw a6, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, a6
-; RV32IMZBS-NEXT: lw a6, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t2, a6
-; RV32IMZBS-NEXT: lw a6, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, a6
-; RV32IMZBS-NEXT: lw t3, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t3
-; RV32IMZBS-NEXT: lw t3, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, t3
-; RV32IMZBS-NEXT: lw a6, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, a6
-; RV32IMZBS-NEXT: lw a6, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, a6
-; RV32IMZBS-NEXT: xor a1, a0, a1
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a4, a2
-; RV32IMZBS-NEXT: xor a4, a5, t0
-; RV32IMZBS-NEXT: xor a4, a4, t1
-; RV32IMZBS-NEXT: lw a6, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, a6
-; RV32IMZBS-NEXT: xor a1, a1, a3
-; RV32IMZBS-NEXT: lw a3, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a3
-; RV32IMZBS-NEXT: xor a3, a4, t2
-; RV32IMZBS-NEXT: lw a4, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a7, a4
-; RV32IMZBS-NEXT: lw t0, 732(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a6, a1, t0
-; RV32IMZBS-NEXT: srli a7, a1, 8
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: and a2, a3, t0
-; RV32IMZBS-NEXT: xor a4, a3, a4
-; RV32IMZBS-NEXT: srli a3, a3, 8
-; RV32IMZBS-NEXT: and a7, a7, t0
-; RV32IMZBS-NEXT: and a3, a3, t0
+; RV32IMZBS-NEXT: xor a2, t0, s7
+; RV32IMZBS-NEXT: xor a3, a6, a3
+; RV32IMZBS-NEXT: lw a6, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a6
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: xor a1, a2, s4
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: xor a3, a3, a5
+; RV32IMZBS-NEXT: xor a0, a0, t4
+; RV32IMZBS-NEXT: srli a1, a3, 8
+; RV32IMZBS-NEXT: srli a2, a3, 24
+; RV32IMZBS-NEXT: slli a4, a3, 24
+; RV32IMZBS-NEXT: lw a5, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: or a1, a1, a2
+; RV32IMZBS-NEXT: srli a2, a0, 8
+; RV32IMZBS-NEXT: slli a3, a3, 8
+; RV32IMZBS-NEXT: or a3, a4, a3
+; RV32IMZBS-NEXT: srli a4, a0, 24
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: or a2, a2, a4
+; RV32IMZBS-NEXT: and a4, a0, a5
; RV32IMZBS-NEXT: slli a0, a0, 24
-; RV32IMZBS-NEXT: slli a6, a6, 8
-; RV32IMZBS-NEXT: or a0, a0, a6
-; RV32IMZBS-NEXT: srli a1, a1, 24
-; RV32IMZBS-NEXT: or a1, a7, a1
-; RV32IMZBS-NEXT: slli a5, a5, 24
-; RV32IMZBS-NEXT: slli a2, a2, 8
-; RV32IMZBS-NEXT: or a2, a5, a2
-; RV32IMZBS-NEXT: srli a4, a4, 24
-; RV32IMZBS-NEXT: or a3, a3, a4
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: or a2, a2, a3
-; RV32IMZBS-NEXT: srli a1, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, s4
-; RV32IMZBS-NEXT: srli a3, a2, 4
-; RV32IMZBS-NEXT: and a2, a2, s4
-; RV32IMZBS-NEXT: and a1, a1, s4
-; RV32IMZBS-NEXT: and a3, a3, s4
+; RV32IMZBS-NEXT: slli a4, a4, 8
+; RV32IMZBS-NEXT: or a0, a0, a4
+; RV32IMZBS-NEXT: or a1, a3, a1
+; RV32IMZBS-NEXT: or a0, a0, a2
+; RV32IMZBS-NEXT: srli a2, a1, 4
+; RV32IMZBS-NEXT: lw a4, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: srli a3, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, a4
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: slli a1, a1, 4
; RV32IMZBS-NEXT: slli a0, a0, 4
-; RV32IMZBS-NEXT: slli a2, a2, 4
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: or a2, a3, a2
-; RV32IMZBS-NEXT: srli a1, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, t5
-; RV32IMZBS-NEXT: srli a3, a2, 2
-; RV32IMZBS-NEXT: and a2, a2, t5
-; RV32IMZBS-NEXT: and a1, a1, t5
-; RV32IMZBS-NEXT: and a3, a3, t5
+; RV32IMZBS-NEXT: or a1, a2, a1
+; RV32IMZBS-NEXT: or a0, a3, a0
+; RV32IMZBS-NEXT: srli a2, a1, 2
+; RV32IMZBS-NEXT: and a1, a1, s8
+; RV32IMZBS-NEXT: srli a3, a0, 2
+; RV32IMZBS-NEXT: and a0, a0, s8
+; RV32IMZBS-NEXT: and a2, a2, s8
+; RV32IMZBS-NEXT: and a3, a3, s8
+; RV32IMZBS-NEXT: slli a1, a1, 2
+; RV32IMZBS-NEXT: or a1, a2, a1
+; RV32IMZBS-NEXT: lui a2, 349525
+; RV32IMZBS-NEXT: addi a2, a2, 1364
; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: lui a1, 349525
-; RV32IMZBS-NEXT: addi a1, a1, 1364
-; RV32IMZBS-NEXT: slli a2, a2, 2
-; RV32IMZBS-NEXT: or a2, a3, a2
-; RV32IMZBS-NEXT: srli a3, a0, 1
-; RV32IMZBS-NEXT: and a0, a0, t4
-; RV32IMZBS-NEXT: and a4, a2, t4
-; RV32IMZBS-NEXT: srli a2, a2, 1
-; RV32IMZBS-NEXT: and a3, a3, a1
-; RV32IMZBS-NEXT: and a1, a2, a1
-; RV32IMZBS-NEXT: slli a0, a0, 1
; RV32IMZBS-NEXT: or a0, a3, a0
-; RV32IMZBS-NEXT: slli a4, a4, 1
-; RV32IMZBS-NEXT: or a1, a1, a4
+; RV32IMZBS-NEXT: srli a3, a1, 1
+; RV32IMZBS-NEXT: and a1, a1, s5
+; RV32IMZBS-NEXT: and a4, a0, s5
; RV32IMZBS-NEXT: srli a0, a0, 1
-; RV32IMZBS-NEXT: lw s0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, a0, s0
+; RV32IMZBS-NEXT: and a3, a3, a2
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: slli a1, a1, 1
+; RV32IMZBS-NEXT: or a1, a3, a1
+; RV32IMZBS-NEXT: slli a4, a4, 1
+; RV32IMZBS-NEXT: or a0, a0, a4
; RV32IMZBS-NEXT: srli a1, a1, 1
-; RV32IMZBS-NEXT: lw s1, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, a1, s1
-; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s3, 720(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, a1, s0
+; RV32IMZBS-NEXT: srli a0, a0, 1
+; RV32IMZBS-NEXT: lw s1, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, a0, s1
+; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s3, 700(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: sw s3, 0(a0)
; RV32IMZBS-NEXT: sw s0, 4(a0)
-; RV32IMZBS-NEXT: lw s2, 724(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 704(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: sw s2, 8(a0)
; RV32IMZBS-NEXT: sw s1, 12(a0)
-; RV32IMZBS-NEXT: addi a0, sp, 736
-; RV32IMZBS-NEXT: sw s3, 736(sp)
-; RV32IMZBS-NEXT: sw s0, 740(sp)
-; RV32IMZBS-NEXT: sw s2, 744(sp)
-; RV32IMZBS-NEXT: sw s1, 748(sp)
+; RV32IMZBS-NEXT: addi a0, sp, 720
+; RV32IMZBS-NEXT: sw s3, 720(sp)
+; RV32IMZBS-NEXT: sw s0, 724(sp)
+; RV32IMZBS-NEXT: sw s2, 728(sp)
+; RV32IMZBS-NEXT: sw s1, 732(sp)
; RV32IMZBS-NEXT: call vector_use
-; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: sw s3, 0(a0)
; RV32IMZBS-NEXT: sw s0, 4(a0)
; RV32IMZBS-NEXT: sw s2, 8(a0)
; RV32IMZBS-NEXT: sw s1, 12(a0)
-; RV32IMZBS-NEXT: lw ra, 812(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 808(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 804(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s2, 800(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s3, 796(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s4, 792(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s5, 788(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 784(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 780(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s8, 776(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s9, 772(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s10, 768(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s11, 764(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: addi sp, sp, 816
+; RV32IMZBS-NEXT: lw ra, 796(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 792(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 788(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 784(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s3, 780(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s4, 776(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s5, 772(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s6, 768(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 764(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 760(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 756(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s10, 752(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s11, 748(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: addi sp, sp, 800
; RV32IMZBS-NEXT: ret
;
; RV64IMZBS-LABEL: mul_use_commutative_clmul_v2i64:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -1120
-; RV64IMZBS-NEXT: sd ra, 1112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s0, 1104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 1096(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s2, 1088(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s3, 1080(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s4, 1072(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s5, 1064(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s6, 1056(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s7, 1048(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s8, 1040(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s9, 1032(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s10, 1024(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s11, 1016(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd a5, 848(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd a4, 840(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi t4, a2, 2
-; RV64IMZBS-NEXT: andi t6, a2, 1
-; RV64IMZBS-NEXT: andi a7, a2, 4
-; RV64IMZBS-NEXT: andi t2, a2, 8
-; RV64IMZBS-NEXT: andi a6, a2, 16
-; RV64IMZBS-NEXT: andi t0, a2, 32
-; RV64IMZBS-NEXT: andi a4, a2, 64
-; RV64IMZBS-NEXT: andi t3, a2, 128
-; RV64IMZBS-NEXT: andi t5, a2, 256
-; RV64IMZBS-NEXT: andi a5, a2, 512
-; RV64IMZBS-NEXT: andi t1, a2, 1024
-; RV64IMZBS-NEXT: bseti s0, zero, 11
-; RV64IMZBS-NEXT: sd s0, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t4, a0, t4
-; RV64IMZBS-NEXT: mul t6, a0, t6
-; RV64IMZBS-NEXT: xor t4, t6, t4
-; RV64IMZBS-NEXT: sd t4, 832(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a7, a0, a7
-; RV64IMZBS-NEXT: mul t2, a0, t2
-; RV64IMZBS-NEXT: xor a7, a7, t2
-; RV64IMZBS-NEXT: sd a7, 816(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a6, a0, a6
-; RV64IMZBS-NEXT: mul a7, a0, t0
-; RV64IMZBS-NEXT: xor a6, a6, a7
-; RV64IMZBS-NEXT: sd a6, 808(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a6, a0, t3
-; RV64IMZBS-NEXT: mul a7, a0, t5
-; RV64IMZBS-NEXT: xor a6, a6, a7
-; RV64IMZBS-NEXT: sd a6, 800(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 792(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a5
-; RV64IMZBS-NEXT: sd a4, 784(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, t1
-; RV64IMZBS-NEXT: sd a4, 824(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s10, zero, 31
-; RV64IMZBS-NEXT: sd s10, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s11, zero, 32
-; RV64IMZBS-NEXT: sd s11, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti ra, zero, 33
-; RV64IMZBS-NEXT: sd ra, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 34
-; RV64IMZBS-NEXT: sd a4, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 35
-; RV64IMZBS-NEXT: sd a4, 1000(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 36
-; RV64IMZBS-NEXT: sd a4, 992(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 37
-; RV64IMZBS-NEXT: sd a4, 984(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 38
-; RV64IMZBS-NEXT: sd a4, 976(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 39
-; RV64IMZBS-NEXT: sd a4, 968(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 40
-; RV64IMZBS-NEXT: sd a4, 960(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 41
-; RV64IMZBS-NEXT: sd a4, 952(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 42
-; RV64IMZBS-NEXT: sd a4, 944(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 43
-; RV64IMZBS-NEXT: sd a4, 936(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 44
-; RV64IMZBS-NEXT: sd a4, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 45
+; RV64IMZBS-NEXT: addi sp, sp, -1040
+; RV64IMZBS-NEXT: sd ra, 1032(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s0, 1024(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 1016(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 1008(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s3, 1000(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s4, 992(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s5, 984(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s6, 976(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s7, 968(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s8, 960(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s9, 952(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s10, 944(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s11, 936(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd a5, 928(sp) # 8-byte Folded Spill
; RV64IMZBS-NEXT: sd a4, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 46
-; RV64IMZBS-NEXT: sd a4, 912(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 47
-; RV64IMZBS-NEXT: sd a4, 904(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 48
-; RV64IMZBS-NEXT: sd a4, 896(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 49
-; RV64IMZBS-NEXT: sd a4, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 50
-; RV64IMZBS-NEXT: sd a4, 880(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 51
-; RV64IMZBS-NEXT: sd a4, 872(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s9, zero, 52
-; RV64IMZBS-NEXT: sd s9, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a4, zero, 53
-; RV64IMZBS-NEXT: sd a4, 864(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s0, zero, 54
-; RV64IMZBS-NEXT: bseti a4, zero, 55
-; RV64IMZBS-NEXT: sd a4, 160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s1, zero, 56
-; RV64IMZBS-NEXT: bseti s2, zero, 57
-; RV64IMZBS-NEXT: bseti s3, zero, 58
-; RV64IMZBS-NEXT: bseti s4, zero, 59
-; RV64IMZBS-NEXT: bseti s5, zero, 60
-; RV64IMZBS-NEXT: bseti s6, zero, 61
-; RV64IMZBS-NEXT: bseti s7, zero, 62
-; RV64IMZBS-NEXT: bseti s8, zero, 63
-; RV64IMZBS-NEXT: ld a4, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 776(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 1
-; RV64IMZBS-NEXT: and a5, a2, a5
-; RV64IMZBS-NEXT: lui a6, 2
-; RV64IMZBS-NEXT: and a6, a2, a6
-; RV64IMZBS-NEXT: lui a7, 4
-; RV64IMZBS-NEXT: and a7, a2, a7
-; RV64IMZBS-NEXT: lui t0, 8
-; RV64IMZBS-NEXT: and t0, a2, t0
-; RV64IMZBS-NEXT: lui t1, 16
-; RV64IMZBS-NEXT: and t1, a2, t1
-; RV64IMZBS-NEXT: lui t2, 32
-; RV64IMZBS-NEXT: and t2, a2, t2
-; RV64IMZBS-NEXT: lui t3, 64
-; RV64IMZBS-NEXT: and t3, a2, t3
-; RV64IMZBS-NEXT: lui t4, 128
-; RV64IMZBS-NEXT: and t4, a2, t4
-; RV64IMZBS-NEXT: lui t5, 256
-; RV64IMZBS-NEXT: and t5, a2, t5
-; RV64IMZBS-NEXT: lui t6, 512
-; RV64IMZBS-NEXT: and t6, a2, t6
-; RV64IMZBS-NEXT: lui a4, 1024
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 768(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 2048
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 760(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 4096
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 752(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 8192
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 744(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 16384
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 32768
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 728(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 65536
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 131072
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 712(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 262144
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s10
-; RV64IMZBS-NEXT: sd a4, 688(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s10, a2, s11
-; RV64IMZBS-NEXT: and s11, a2, ra
-; RV64IMZBS-NEXT: ld ra, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and ra, a2, ra
-; RV64IMZBS-NEXT: ld a4, 1000(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 680(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 992(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 664(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 984(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 648(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 616(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 600(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 960(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 584(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 544(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 488(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 456(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 912(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 896(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 880(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s9
-; RV64IMZBS-NEXT: sd a4, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 864(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, a4
-; RV64IMZBS-NEXT: sd a4, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s0
-; RV64IMZBS-NEXT: sd a4, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s9, s0
-; RV64IMZBS-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a2, s0
-; RV64IMZBS-NEXT: sd a4, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s1
-; RV64IMZBS-NEXT: sd a4, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s2
-; RV64IMZBS-NEXT: sd a4, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s3
-; RV64IMZBS-NEXT: sd a4, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s4
-; RV64IMZBS-NEXT: sd a4, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s5
-; RV64IMZBS-NEXT: sd a4, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s6
-; RV64IMZBS-NEXT: sd a4, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a4, a2, s7
-; RV64IMZBS-NEXT: sd a4, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a2, s8
-; RV64IMZBS-NEXT: ld a4, 776(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 512(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a5
-; RV64IMZBS-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a6
-; RV64IMZBS-NEXT: sd a4, 576(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, a7
-; RV64IMZBS-NEXT: sd a4, 640(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t0, a0, t0
-; RV64IMZBS-NEXT: mul a4, a0, t1
-; RV64IMZBS-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, t2
-; RV64IMZBS-NEXT: sd a4, 560(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, t3
-; RV64IMZBS-NEXT: sd a4, 624(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, t4
-; RV64IMZBS-NEXT: sd a4, 704(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t5, a0, t5
-; RV64IMZBS-NEXT: mul t6, a0, t6
-; RV64IMZBS-NEXT: ld a4, 768(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 552(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 760(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 752(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 672(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 744(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 744(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 736(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 472(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 728(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 464(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 720(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 536(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 712(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 592(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 696(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 656(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 688(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 712(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, s10
-; RV64IMZBS-NEXT: sd a4, 760(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, s11
-; RV64IMZBS-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a4, a0, ra
-; RV64IMZBS-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 680(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 520(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 664(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 568(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 648(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 616(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 680(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 600(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 728(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 584(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 776(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 544(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 424(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 528(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 488(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 480(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 456(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 544(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 432(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 600(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 648(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 736(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 768(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 456(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 584(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 616(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 664(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 688(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 720(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 752(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a4, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a0, a0, a2
-; RV64IMZBS-NEXT: sd a0, 488(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a0, a3, 2
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: andi a2, a3, 1
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: xor a0, a2, a0
-; RV64IMZBS-NEXT: sd a0, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a0, a3, 4
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: andi a2, a3, 8
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: xor a0, a0, a2
-; RV64IMZBS-NEXT: sd a0, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a0, a3, 16
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: andi a2, a3, 32
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: xor a0, a0, a2
-; RV64IMZBS-NEXT: sd a0, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a0, a3, 128
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: andi a2, a3, 256
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: xor a0, a0, a2
-; RV64IMZBS-NEXT: sd a0, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and t3, a3, a0
-; RV64IMZBS-NEXT: lui a0, 1
+; RV64IMZBS-NEXT: mv t3, a3
+; RV64IMZBS-NEXT: mv t0, a1
+; RV64IMZBS-NEXT: slli t6, a0, 1
+; RV64IMZBS-NEXT: andi s6, a2, 2
+; RV64IMZBS-NEXT: slli s8, a0, 2
+; RV64IMZBS-NEXT: andi s10, a2, 4
+; RV64IMZBS-NEXT: slli s5, a0, 3
+; RV64IMZBS-NEXT: andi s9, a2, 8
+; RV64IMZBS-NEXT: slli s3, a0, 4
+; RV64IMZBS-NEXT: andi s7, a2, 16
+; RV64IMZBS-NEXT: slli s0, a0, 5
+; RV64IMZBS-NEXT: andi s4, a2, 32
+; RV64IMZBS-NEXT: slli a3, a0, 6
+; RV64IMZBS-NEXT: andi s1, a2, 64
+; RV64IMZBS-NEXT: slli t4, a0, 7
+; RV64IMZBS-NEXT: andi s2, a2, 128
+; RV64IMZBS-NEXT: slli a1, a0, 8
+; RV64IMZBS-NEXT: andi t5, a2, 256
+; RV64IMZBS-NEXT: slli a6, a0, 9
+; RV64IMZBS-NEXT: andi t2, a2, 512
+; RV64IMZBS-NEXT: slli a5, a0, 10
+; RV64IMZBS-NEXT: andi a7, a2, 1024
+; RV64IMZBS-NEXT: slli t1, a0, 11
+; RV64IMZBS-NEXT: not a4, a2
+; RV64IMZBS-NEXT: seqz s6, s6
+; RV64IMZBS-NEXT: addi s6, s6, -1
+; RV64IMZBS-NEXT: and t6, s6, t6
+; RV64IMZBS-NEXT: sd t6, 912(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli t6, a0, 12
+; RV64IMZBS-NEXT: seqz s6, s10
+; RV64IMZBS-NEXT: addi s6, s6, -1
+; RV64IMZBS-NEXT: and s6, s6, s8
+; RV64IMZBS-NEXT: sd s6, 904(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s6, a0, 13
+; RV64IMZBS-NEXT: seqz s8, s9
+; RV64IMZBS-NEXT: addi s8, s8, -1
+; RV64IMZBS-NEXT: and s5, s8, s5
+; RV64IMZBS-NEXT: sd s5, 896(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s5, a0, 14
+; RV64IMZBS-NEXT: seqz s7, s7
+; RV64IMZBS-NEXT: addi s7, s7, -1
+; RV64IMZBS-NEXT: and s3, s7, s3
+; RV64IMZBS-NEXT: sd s3, 880(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli s3, a0, 15
+; RV64IMZBS-NEXT: seqz s4, s4
+; RV64IMZBS-NEXT: addi s4, s4, -1
+; RV64IMZBS-NEXT: and s4, s4, s0
+; RV64IMZBS-NEXT: slli s0, a0, 16
+; RV64IMZBS-NEXT: seqz s1, s1
+; RV64IMZBS-NEXT: addi s1, s1, -1
+; RV64IMZBS-NEXT: and a3, s1, a3
+; RV64IMZBS-NEXT: sd a3, 888(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a3, a0, 17
+; RV64IMZBS-NEXT: seqz s1, s2
+; RV64IMZBS-NEXT: addi s1, s1, -1
+; RV64IMZBS-NEXT: and s2, s1, t4
+; RV64IMZBS-NEXT: slli t4, a0, 18
+; RV64IMZBS-NEXT: seqz t5, t5
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: and t5, t5, a1
+; RV64IMZBS-NEXT: slli a1, a0, 19
+; RV64IMZBS-NEXT: seqz t2, t2
+; RV64IMZBS-NEXT: addi t2, t2, -1
+; RV64IMZBS-NEXT: and a6, t2, a6
+; RV64IMZBS-NEXT: sd a6, 848(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a6, a0, 20
+; RV64IMZBS-NEXT: seqz a7, a7
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: and a5, a7, a5
+; RV64IMZBS-NEXT: sd a5, 864(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a5, a4, 11
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and t1, a5, t1
+; RV64IMZBS-NEXT: bexti a5, a4, 12
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and t2, a5, t6
+; RV64IMZBS-NEXT: bexti a5, a4, 13
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a5, a5, s6
+; RV64IMZBS-NEXT: sd a5, 816(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a5, a4, 14
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a5, a5, s5
+; RV64IMZBS-NEXT: sd a5, 840(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a5, a4, 15
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a5, a5, s3
+; RV64IMZBS-NEXT: sd a5, 872(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a5, a4, 16
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and s3, a5, s0
+; RV64IMZBS-NEXT: bexti a5, a4, 17
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a3, a5, a3
+; RV64IMZBS-NEXT: sd a3, 768(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 18
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, t4
+; RV64IMZBS-NEXT: sd a3, 792(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 19
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a1, a3, a1
+; RV64IMZBS-NEXT: sd a1, 824(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 20
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a1, a3, a6
+; RV64IMZBS-NEXT: sd a1, 832(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 21
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 21
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 856(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 22
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 22
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 728(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 23
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 23
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 720(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 24
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 24
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 752(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 25
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 25
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 776(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 26
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 26
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 784(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 27
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 27
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 800(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 28
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 28
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 808(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 29
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 29
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 664(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 30
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 30
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 656(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sraiw a3, a2, 31
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 31
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 688(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 32
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 32
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 704(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 33
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 33
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 712(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 34
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 34
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 736(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 35
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 35
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 744(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 36
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 36
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 760(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 37
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 37
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 592(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 38
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 38
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 576(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 39
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 39
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 624(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 40
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 40
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 632(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 41
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 41
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 640(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 42
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 42
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 648(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 43
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 43
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 672(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 44
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 44
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 680(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 45
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 45
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 696(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 46
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 46
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 536(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 47
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 47
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 528(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 48
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 48
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 544(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 49
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 49
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 552(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 50
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 50
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 560(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 51
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 51
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 568(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 52
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 52
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 584(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 53
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 53
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 600(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 54
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 54
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 616(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 55
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 55
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 608(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 56
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 56
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 472(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 57
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 57
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 464(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 58
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 58
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 480(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 59
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 59
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 488(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 60
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 60
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 496(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 61
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a5, a0, 61
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 504(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a3, a4, 62
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, a0, 62
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 512(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a3, a2, 1
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: addi a3, a3, -1
; RV64IMZBS-NEXT: and t4, a3, a0
-; RV64IMZBS-NEXT: lui a0, 2
-; RV64IMZBS-NEXT: and s10, a3, a0
-; RV64IMZBS-NEXT: lui a0, 4
-; RV64IMZBS-NEXT: and s11, a3, a0
-; RV64IMZBS-NEXT: lui a0, 8
-; RV64IMZBS-NEXT: and ra, a3, a0
-; RV64IMZBS-NEXT: lui a0, 16
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 32
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a4, 64
-; RV64IMZBS-NEXT: and a4, a3, a4
-; RV64IMZBS-NEXT: lui a0, 128
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 256
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 512
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 1024
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a2, 2048
-; RV64IMZBS-NEXT: and a2, a3, a2
-; RV64IMZBS-NEXT: lui a0, 4096
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 8192
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 16384
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 32768
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 65536
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 240(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 131072
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 232(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a0, 262144
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 224(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 1000(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 216(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 992(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 992(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 984(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 208(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 976(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 968(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 968(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 960(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 960(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 952(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 944(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 912(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 176(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 896(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 880(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 864(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a0, a3, a0
-; RV64IMZBS-NEXT: sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a0, a3, s9
-; RV64IMZBS-NEXT: sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s0, a3, s0
-; RV64IMZBS-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s1, a3, s1
-; RV64IMZBS-NEXT: and a0, a3, s2
-; RV64IMZBS-NEXT: and s9, a3, s3
-; RV64IMZBS-NEXT: and a5, a3, s4
-; RV64IMZBS-NEXT: and s3, a3, s5
-; RV64IMZBS-NEXT: and s0, a3, s6
-; RV64IMZBS-NEXT: and a7, a3, s7
-; RV64IMZBS-NEXT: and t2, a3, s8
-; RV64IMZBS-NEXT: andi a6, a3, 64
-; RV64IMZBS-NEXT: andi t1, a3, 512
-; RV64IMZBS-NEXT: andi a3, a3, 1024
-; RV64IMZBS-NEXT: mul a6, a1, a6
-; RV64IMZBS-NEXT: sd a6, 88(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a6, a1, t1
-; RV64IMZBS-NEXT: sd a6, 80(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, t3
-; RV64IMZBS-NEXT: sd a3, 72(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, t4
-; RV64IMZBS-NEXT: sd a3, 64(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, s10
-; RV64IMZBS-NEXT: sd a3, 152(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, s11
-; RV64IMZBS-NEXT: sd a3, 880(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, ra
-; RV64IMZBS-NEXT: sd a3, 56(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a3, a1, a4
-; RV64IMZBS-NEXT: sd a3, 864(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 936(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a3, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a1, a3
-; RV64IMZBS-NEXT: sd a3, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a2, a1, a2
+; RV64IMZBS-NEXT: slli a0, a0, 63
+; RV64IMZBS-NEXT: srli a2, a2, 63
+; RV64IMZBS-NEXT: seqz a2, a2
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: and a0, a2, a0
+; RV64IMZBS-NEXT: sd a0, 520(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 2
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 1
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 456(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 4
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 2
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 448(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 8
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 3
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 440(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 16
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 4
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 424(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 32
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 5
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 408(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 64
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 6
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 432(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 128
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 7
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 384(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 256
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 8
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 376(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 512
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 9
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 400(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t3, 1024
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 10
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 416(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: not a0, t3
+; RV64IMZBS-NEXT: bexti a2, a0, 11
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 11
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 336(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 12
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 12
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 13
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 13
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 352(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 14
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 14
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 15
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 15
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 392(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 16
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 16
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 17
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 17
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 18
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 18
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 19
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 19
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 320(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 20
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 20
+; RV64IMZBS-NEXT: and a2, a2, a3
; RV64IMZBS-NEXT: sd a2, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 912(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 984(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 240(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
+; RV64IMZBS-NEXT: bexti a2, a0, 21
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 21
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 360(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 22
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 22
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 232(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 23
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 23
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 24
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 24
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 25
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 25
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 26
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 26
+; RV64IMZBS-NEXT: and a2, a2, a3
; RV64IMZBS-NEXT: sd a2, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 232(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
+; RV64IMZBS-NEXT: bexti a2, a0, 27
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 27
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 296(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 28
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 28
+; RV64IMZBS-NEXT: and a2, a2, a3
; RV64IMZBS-NEXT: sd a2, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 224(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 896(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 952(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 1000(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s10, a1, a2
-; RV64IMZBS-NEXT: ld a2, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s8, a1, a2
-; RV64IMZBS-NEXT: ld a2, 216(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 992(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 208(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 904(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 968(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 976(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 960(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 200(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s6, a1, a2
-; RV64IMZBS-NEXT: ld a2, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s5, a1, a2
-; RV64IMZBS-NEXT: ld a2, 192(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 184(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 872(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 176(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 168(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 968(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 992(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s4, a1, a2
-; RV64IMZBS-NEXT: ld a2, 136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s2, a1, a2
-; RV64IMZBS-NEXT: ld a2, 120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
+; RV64IMZBS-NEXT: bexti a2, a0, 29
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 29
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 168(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 30
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 30
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sraiw a2, t3, 31
+; RV64IMZBS-NEXT: seqz a2, a2
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 31
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 192(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 32
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 32
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 33
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 33
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 34
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 34
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 35
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 35
+; RV64IMZBS-NEXT: and a2, a2, a3
; RV64IMZBS-NEXT: sd a2, 240(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 96(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a1, a2
-; RV64IMZBS-NEXT: sd a2, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a2, a1, s1
-; RV64IMZBS-NEXT: sd a2, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a0, a1, a0
-; RV64IMZBS-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a0, a1, s9
-; RV64IMZBS-NEXT: sd a0, 944(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a0, a1, a5
-; RV64IMZBS-NEXT: sd a0, 960(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul s3, a1, s3
-; RV64IMZBS-NEXT: mul s1, a1, s0
-; RV64IMZBS-NEXT: mul s7, a1, a7
-; RV64IMZBS-NEXT: mul a0, a1, t2
-; RV64IMZBS-NEXT: sd a0, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 832(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 816(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, a0, s0
-; RV64IMZBS-NEXT: ld a0, 808(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 792(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s9, a0, a1
-; RV64IMZBS-NEXT: ld a0, 800(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 784(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: bexti a2, a0, 36
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 36
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 37
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 37
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 112(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 38
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 38
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 88(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 39
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 39
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 120(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 40
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 40
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 128(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 41
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 41
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 42
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 42
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 144(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 43
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 43
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 160(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 44
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 44
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 45
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 45
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 184(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 46
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 46
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 47
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 47
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 48
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 48
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 49
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 49
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 48(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 50
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 50
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 51
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 51
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 64(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 52
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli s0, t0, 52
+; RV64IMZBS-NEXT: and a2, a2, s0
+; RV64IMZBS-NEXT: sd a2, 72(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 53
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 53
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 96(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 54
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 54
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 104(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 55
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli s1, t0, 55
+; RV64IMZBS-NEXT: and a2, a2, s1
+; RV64IMZBS-NEXT: sd a2, 80(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 56
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 56
+; RV64IMZBS-NEXT: and s7, a2, a3
+; RV64IMZBS-NEXT: bexti a2, a0, 57
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a4, t0, 57
+; RV64IMZBS-NEXT: and s6, a2, a4
+; RV64IMZBS-NEXT: bexti a4, a0, 58
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: slli a5, t0, 58
+; RV64IMZBS-NEXT: and s8, a4, a5
+; RV64IMZBS-NEXT: bexti a5, a0, 59
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: slli a6, t0, 59
+; RV64IMZBS-NEXT: and s9, a5, a6
+; RV64IMZBS-NEXT: bexti a6, a0, 60
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: slli a7, t0, 60
+; RV64IMZBS-NEXT: and s10, a6, a7
+; RV64IMZBS-NEXT: bexti a7, a0, 61
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: slli a1, t0, 61
+; RV64IMZBS-NEXT: and s11, a7, a1
+; RV64IMZBS-NEXT: bexti a0, a0, 62
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a1, t0, 62
+; RV64IMZBS-NEXT: and a0, a0, a1
+; RV64IMZBS-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a1, t3, 1
+; RV64IMZBS-NEXT: seqz a1, a1
+; RV64IMZBS-NEXT: addi a1, a1, -1
+; RV64IMZBS-NEXT: and a0, a1, t0
+; RV64IMZBS-NEXT: slli t0, t0, 63
+; RV64IMZBS-NEXT: srli a1, t3, 63
+; RV64IMZBS-NEXT: seqz a1, a1
+; RV64IMZBS-NEXT: addi a1, a1, -1
+; RV64IMZBS-NEXT: and a1, a1, t0
+; RV64IMZBS-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: ld a1, 912(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s5, t4, a1
+; RV64IMZBS-NEXT: ld a1, 904(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 896(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, a1, a2
+; RV64IMZBS-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, a1, s4
+; RV64IMZBS-NEXT: xor t5, s2, t5
+; RV64IMZBS-NEXT: xor t6, t1, t2
+; RV64IMZBS-NEXT: ld a1, 768(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s3, a1
+; RV64IMZBS-NEXT: ld a1, 728(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 720(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, a1, s1
+; RV64IMZBS-NEXT: ld a1, 664(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 656(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, a1, a2
+; RV64IMZBS-NEXT: ld a1, 592(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 576(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, a1, a2
+; RV64IMZBS-NEXT: ld a1, 536(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 528(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s4, a1, a2
+; RV64IMZBS-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 464(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, a1, a2
+; RV64IMZBS-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t2, a0, a1
-; RV64IMZBS-NEXT: ld a0, 512(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 504(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, a0, a1
-; RV64IMZBS-NEXT: ld a0, 496(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t0, a0
-; RV64IMZBS-NEXT: xor t5, t5, t6
-; RV64IMZBS-NEXT: ld a0, 472(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 464(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, a0, a1
; RV64IMZBS-NEXT: ld a0, 448(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a5, 440(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a0, a5
-; RV64IMZBS-NEXT: ld a0, 424(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a0, a1
-; RV64IMZBS-NEXT: ld a0, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a0, a1
-; RV64IMZBS-NEXT: ld a0, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, a0, a1
-; RV64IMZBS-NEXT: ld a0, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, a0, a1
-; RV64IMZBS-NEXT: ld a0, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a2, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 408(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a1, a2
-; RV64IMZBS-NEXT: ld a2, 72(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a3, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 384(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a3, 376(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 56(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a4, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a3, 336(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 328(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: ld a4, 40(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld ra, 32(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, ra
+; RV64IMZBS-NEXT: ld a4, 288(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a5, 272(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, a5
+; RV64IMZBS-NEXT: ld a5, 232(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a6, 216(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, a6
+; RV64IMZBS-NEXT: ld a6, 168(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a7, 152(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a6, a7
+; RV64IMZBS-NEXT: ld a7, 112(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t0, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, a7, t0
+; RV64IMZBS-NEXT: ld t0, 32(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 16(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor ra, ra, s11
-; RV64IMZBS-NEXT: xor s8, s10, s8
-; RV64IMZBS-NEXT: xor s5, s6, s5
-; RV64IMZBS-NEXT: xor s2, s4, s2
-; RV64IMZBS-NEXT: xor s1, s3, s1
-; RV64IMZBS-NEXT: xor s0, s0, s9
-; RV64IMZBS-NEXT: ld s3, 824(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t2, s3
-; RV64IMZBS-NEXT: ld s3, 576(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t3, s3
-; RV64IMZBS-NEXT: ld s3, 560(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t4, s3
-; RV64IMZBS-NEXT: ld s3, 552(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, t5, s3
-; RV64IMZBS-NEXT: ld s3, 536(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, t6, s3
-; RV64IMZBS-NEXT: ld s3, 520(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, s3
-; RV64IMZBS-NEXT: ld s3, 480(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, s3
-; RV64IMZBS-NEXT: ld s3, 456(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, s3
-; RV64IMZBS-NEXT: ld s3, 432(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, s3
-; RV64IMZBS-NEXT: xor t1, t1, a0
-; RV64IMZBS-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, a1, a0
-; RV64IMZBS-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, a0
-; RV64IMZBS-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, a0
-; RV64IMZBS-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, a0
-; RV64IMZBS-NEXT: ld a0, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s3, ra, a0
-; RV64IMZBS-NEXT: ld a0, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s4, s8, a0
-; RV64IMZBS-NEXT: ld a0, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s5, s5, a0
-; RV64IMZBS-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s2, s2, a0
-; RV64IMZBS-NEXT: xor s1, s1, s7
-; RV64IMZBS-NEXT: xor t2, s0, t2
-; RV64IMZBS-NEXT: ld a0, 640(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t3, a0
-; RV64IMZBS-NEXT: ld a0, 624(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t4, a0
-; RV64IMZBS-NEXT: ld a0, 608(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, t5, a0
-; RV64IMZBS-NEXT: ld a0, 592(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, t6, a0
-; RV64IMZBS-NEXT: ld a0, 568(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, a0
-; RV64IMZBS-NEXT: ld a0, 544(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, a0
-; RV64IMZBS-NEXT: ld a0, 528(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, a0
-; RV64IMZBS-NEXT: ld a0, 488(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a0, t0, a0
-; RV64IMZBS-NEXT: xor t0, t1, a1
-; RV64IMZBS-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, a1
-; RV64IMZBS-NEXT: ld a1, 864(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, a1
-; RV64IMZBS-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, a1
-; RV64IMZBS-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, s3, a1
-; RV64IMZBS-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s4, a1
-; RV64IMZBS-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s3, s5, a1
-; RV64IMZBS-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s2, s2, a1
-; RV64IMZBS-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, s1, a1
-; RV64IMZBS-NEXT: xor t2, t2, t3
-; RV64IMZBS-NEXT: ld t3, 704(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t4, t3
-; RV64IMZBS-NEXT: ld t4, 672(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, ra
+; RV64IMZBS-NEXT: xor s6, s7, s6
+; RV64IMZBS-NEXT: xor t3, s5, t3
+; RV64IMZBS-NEXT: ld s5, 888(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t4, s5
+; RV64IMZBS-NEXT: ld s5, 848(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, t5, s5
+; RV64IMZBS-NEXT: ld s5, 816(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, t6, s5
+; RV64IMZBS-NEXT: ld s5, 792(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s0, s5
+; RV64IMZBS-NEXT: ld s5, 752(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s1, s5
+; RV64IMZBS-NEXT: ld s5, 688(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, s2, s5
+; RV64IMZBS-NEXT: ld s5, 624(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, s3, s5
+; RV64IMZBS-NEXT: ld s5, 544(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s4, s4, s5
+; RV64IMZBS-NEXT: ld s5, 480(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s5
+; RV64IMZBS-NEXT: xor a0, t2, a0
+; RV64IMZBS-NEXT: ld t2, 432(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a1, t2
+; RV64IMZBS-NEXT: ld t2, 400(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, t2
+; RV64IMZBS-NEXT: ld t2, 352(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a3, t2
+; RV64IMZBS-NEXT: ld t2, 304(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, t2
+; RV64IMZBS-NEXT: ld t2, 256(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, t2
+; RV64IMZBS-NEXT: ld t2, 192(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a6, t2
+; RV64IMZBS-NEXT: ld t2, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, a7, t2
+; RV64IMZBS-NEXT: ld t2, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, t2
+; RV64IMZBS-NEXT: xor t2, s6, s8
+; RV64IMZBS-NEXT: xor t3, t3, t4
+; RV64IMZBS-NEXT: ld t4, 864(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t4, t5, t4
-; RV64IMZBS-NEXT: ld t5, 656(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t5, 840(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t5, t6, t5
-; RV64IMZBS-NEXT: ld t6, 632(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t6
-; RV64IMZBS-NEXT: ld t6, 600(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t6
-; RV64IMZBS-NEXT: ld t6, 584(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t6
-; RV64IMZBS-NEXT: xor a2, t0, a2
-; RV64IMZBS-NEXT: ld t0, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, t0
-; RV64IMZBS-NEXT: ld t0, 912(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, t0
-; RV64IMZBS-NEXT: ld t0, 896(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: ld t1, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, s0, t1
-; RV64IMZBS-NEXT: ld t6, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, s3, t6
-; RV64IMZBS-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s2, s0
-; RV64IMZBS-NEXT: xor t2, t2, t3
-; RV64IMZBS-NEXT: ld t3, 744(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t4, t3
-; RV64IMZBS-NEXT: ld t4, 712(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t5, t4
-; RV64IMZBS-NEXT: ld t5, 680(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t5
-; RV64IMZBS-NEXT: ld t5, 648(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t5
-; RV64IMZBS-NEXT: ld t5, 616(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t5
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 984(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t6, 824(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, s0, t6
+; RV64IMZBS-NEXT: ld s0, 776(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s1, s0
+; RV64IMZBS-NEXT: ld s1, 704(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s2, s1
+; RV64IMZBS-NEXT: ld s2, 632(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, s3, s2
+; RV64IMZBS-NEXT: ld s3, 552(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, s4, s3
+; RV64IMZBS-NEXT: ld s4, 488(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s4
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 368(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 320(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t0, a4
-; RV64IMZBS-NEXT: ld t0, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: ld t1, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t6, t1
-; RV64IMZBS-NEXT: ld t5, 336(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 264(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 200(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 128(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: ld a7, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t0, a7
+; RV64IMZBS-NEXT: xor t0, t2, s9
+; RV64IMZBS-NEXT: xor t2, t3, t4
+; RV64IMZBS-NEXT: ld t3, 872(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
+; RV64IMZBS-NEXT: ld t4, 832(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 784(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t5, s0, t5
-; RV64IMZBS-NEXT: xor t2, t2, t3
-; RV64IMZBS-NEXT: ld t3, 760(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t4, t3
-; RV64IMZBS-NEXT: ld t4, 728(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t4
-; RV64IMZBS-NEXT: ld t4, 696(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t4
-; RV64IMZBS-NEXT: ld t4, 664(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t4
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 1000(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t6, 712(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, s1, t6
+; RV64IMZBS-NEXT: ld s0, 640(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s2, s0
+; RV64IMZBS-NEXT: ld s1, 560(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s3, s1
+; RV64IMZBS-NEXT: ld s2, 496(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s2
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 344(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 280(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: ld a4, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t0, a4
-; RV64IMZBS-NEXT: ld t0, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: ld t1, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t5, t1
-; RV64IMZBS-NEXT: xor t2, t2, t3
-; RV64IMZBS-NEXT: ld t3, 776(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t3
-; RV64IMZBS-NEXT: ld t3, 736(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t3
-; RV64IMZBS-NEXT: ld t3, 688(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t3
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 1008(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 208(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 136(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 56(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: xor a7, t0, s10
+; RV64IMZBS-NEXT: xor t0, t2, t3
+; RV64IMZBS-NEXT: ld t2, 856(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t4, t2
+; RV64IMZBS-NEXT: ld t3, 800(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
+; RV64IMZBS-NEXT: ld t4, 736(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 648(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, s0, t5
+; RV64IMZBS-NEXT: ld t6, 568(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, s1, t6
+; RV64IMZBS-NEXT: ld s0, 504(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s0
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 296(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 224(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t0, a4
-; RV64IMZBS-NEXT: ld t0, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: xor a5, t2, a5
-; RV64IMZBS-NEXT: ld t1, 768(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t1
-; RV64IMZBS-NEXT: ld t1, 720(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t1
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 992(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 144(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: xor a6, a7, s11
+; RV64IMZBS-NEXT: xor a7, t0, t2
+; RV64IMZBS-NEXT: ld t0, 808(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t3, t0
+; RV64IMZBS-NEXT: ld t2, 744(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t4, t2
+; RV64IMZBS-NEXT: ld t3, 672(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
+; RV64IMZBS-NEXT: ld t4, 584(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 512(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, t5
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 160(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t0, a4
-; RV64IMZBS-NEXT: xor a5, a5, a6
-; RV64IMZBS-NEXT: ld a6, 752(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a7, a6
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 960(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 72(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: xor a6, a7, t0
+; RV64IMZBS-NEXT: ld a7, 760(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t2, a7
+; RV64IMZBS-NEXT: ld t0, 680(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t3, t0
+; RV64IMZBS-NEXT: ld t2, 600(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t4, t2
+; RV64IMZBS-NEXT: ld t3, 520(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, t3
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 176(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 96(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: xor s1, a5, a6
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: xor s1, s1, a0
-; RV64IMZBS-NEXT: xor s2, a2, a1
-; RV64IMZBS-NEXT: ld a0, 840(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 8(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: xor a5, a6, a7
+; RV64IMZBS-NEXT: ld a6, 696(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, t0, a6
+; RV64IMZBS-NEXT: ld a7, 616(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t2, a7
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 104(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: xor a3, a5, a6
+; RV64IMZBS-NEXT: ld a5, 608(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a7, a5
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: xor a3, a3, a5
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: xor s1, a3, t1
+; RV64IMZBS-NEXT: xor s2, a0, a4
+; RV64IMZBS-NEXT: ld a0, 920(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: sd s1, 0(a0)
; RV64IMZBS-NEXT: sd s2, 8(a0)
; RV64IMZBS-NEXT: mv a0, s1
; RV64IMZBS-NEXT: mv a1, s2
; RV64IMZBS-NEXT: call vector_use
-; RV64IMZBS-NEXT: ld a0, 848(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a0, 928(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: sd s1, 0(a0)
; RV64IMZBS-NEXT: sd s2, 8(a0)
-; RV64IMZBS-NEXT: ld ra, 1112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 1104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 1096(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s2, 1088(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s3, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s4, 1072(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s5, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s6, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s7, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s8, 1040(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s9, 1032(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s10, 1024(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 1016(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 1120
+; RV64IMZBS-NEXT: ld ra, 1032(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 1024(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 1016(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 1008(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 1000(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s4, 992(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s5, 984(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s6, 976(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s7, 968(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s8, 960(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s9, 952(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s10, 944(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s11, 936(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 1040
; RV64IMZBS-NEXT: ret
%xy = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %x, <2 x i64> %y)
%yx = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %y, <2 x i64> %x)
@@ -24161,3 +25711,5 @@ define void @mul_use_commutative_clmul_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-I: {{.*}}
+; CHECK-M: {{.*}}
+; CHECK-ZBS: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/clmulh.ll b/llvm/test/CodeGen/RISCV/clmulh.ll
index 84b876d0605ec..154db64ec555b 100644
--- a/llvm/test/CodeGen/RISCV/clmulh.ll
+++ b/llvm/test/CodeGen/RISCV/clmulh.ll
@@ -7,472 +7,28 @@
; RUN: llc -mtriple=riscv64 -mattr=+m,+zbs -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZBS,RV64IMZBS
define i4 @clmulh_i4(i4 %a, i4 %b) nounwind {
-; RV32I-LABEL: clmulh_i4:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: andi s0, a0, 15
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s2, s3, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: srli a0, a0, 4
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: clmulh_i4:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: andi s0, a0, 15
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: srli a0, a0, 4
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; CHECK-M-LABEL: clmulh_i4:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: andi a0, a0, 15
-; CHECK-M-NEXT: andi a2, a1, 4
-; CHECK-M-NEXT: andi a3, a1, 2
-; CHECK-M-NEXT: andi a1, a1, 8
-; CHECK-M-NEXT: mul a2, a0, a2
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: xor a2, a3, a2
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a0, a2, a0
-; CHECK-M-NEXT: srli a0, a0, 4
-; CHECK-M-NEXT: ret
-;
-; CHECK-ZBS-LABEL: clmulh_i4:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: andi a0, a0, 15
-; CHECK-ZBS-NEXT: andi a2, a1, 4
-; CHECK-ZBS-NEXT: andi a3, a1, 2
-; CHECK-ZBS-NEXT: andi a1, a1, 8
-; CHECK-ZBS-NEXT: mul a2, a0, a2
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: xor a2, a3, a2
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a0, a2, a0
-; CHECK-ZBS-NEXT: srli a0, a0, 4
-; CHECK-ZBS-NEXT: ret
+; CHECK-LABEL: clmulh_i4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi a0, a0, 15
+; CHECK-NEXT: andi a2, a1, 4
+; CHECK-NEXT: andi a3, a1, 2
+; CHECK-NEXT: andi a1, a1, 8
+; CHECK-NEXT: slli a4, a0, 2
+; CHECK-NEXT: seqz a2, a2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a2, a2, a4
+; CHECK-NEXT: slli a4, a0, 1
+; CHECK-NEXT: seqz a3, a3
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: xor a2, a3, a2
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: xor a0, a2, a0
+; CHECK-NEXT: srli a0, a0, 4
+; CHECK-NEXT: ret
%a.ext = zext i4 %a to i8
%b.ext = zext i4 %b to i8
%clmul = call i8 @llvm.clmul.i8(i8 %a.ext, i8 %b.ext)
@@ -482,472 +38,28 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind {
}
define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind {
-; RV32I-LABEL: clmulh_i4_bitreverse:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: andi s0, a0, 15
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s2, s3, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: srli a0, a0, 4
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: clmulh_i4_bitreverse:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: andi s0, a0, 15
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: srli a0, a0, 4
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; CHECK-M-LABEL: clmulh_i4_bitreverse:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: andi a0, a0, 15
-; CHECK-M-NEXT: andi a2, a1, 4
-; CHECK-M-NEXT: andi a3, a1, 2
-; CHECK-M-NEXT: andi a1, a1, 8
-; CHECK-M-NEXT: mul a2, a0, a2
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: xor a2, a3, a2
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a0, a2, a0
-; CHECK-M-NEXT: srli a0, a0, 4
-; CHECK-M-NEXT: ret
-;
-; CHECK-ZBS-LABEL: clmulh_i4_bitreverse:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: andi a0, a0, 15
-; CHECK-ZBS-NEXT: andi a2, a1, 4
-; CHECK-ZBS-NEXT: andi a3, a1, 2
-; CHECK-ZBS-NEXT: andi a1, a1, 8
-; CHECK-ZBS-NEXT: mul a2, a0, a2
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: xor a2, a3, a2
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a0, a2, a0
-; CHECK-ZBS-NEXT: srli a0, a0, 4
-; CHECK-ZBS-NEXT: ret
+; CHECK-LABEL: clmulh_i4_bitreverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi a0, a0, 15
+; CHECK-NEXT: andi a2, a1, 4
+; CHECK-NEXT: andi a3, a1, 2
+; CHECK-NEXT: andi a1, a1, 8
+; CHECK-NEXT: slli a4, a0, 2
+; CHECK-NEXT: seqz a2, a2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a2, a2, a4
+; CHECK-NEXT: slli a4, a0, 1
+; CHECK-NEXT: seqz a3, a3
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: xor a2, a3, a2
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: xor a0, a2, a0
+; CHECK-NEXT: srli a0, a0, 4
+; CHECK-NEXT: ret
%a.rev = call i4 @llvm.bitreverse.i4(i4 %a)
%b.rev = call i4 @llvm.bitreverse.i4(i4 %b)
%clmul = call i4 @llvm.clmul.i4(i4 %a.rev, i4 %b.rev)
@@ -958,502 +70,57 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind {
define i8 @clmulh_i8(i8 %a, i8 %b) nounwind {
-; RV32I-LABEL: clmulh_i8:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: zext.b s0, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s2, s3, a0
-; RV32I-NEXT: andi a1, s1, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: clmulh_i8:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: zext.b s0, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: andi a1, s1, 128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; CHECK-M-LABEL: clmulh_i8:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: zext.b a0, a0
-; CHECK-M-NEXT: andi a2, a1, 2
-; CHECK-M-NEXT: andi a3, a1, 1
-; CHECK-M-NEXT: andi a4, a1, 4
-; CHECK-M-NEXT: andi a5, a1, 8
-; CHECK-M-NEXT: mul a2, a0, a2
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: xor a2, a3, a2
-; CHECK-M-NEXT: andi a3, a1, 16
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: xor a4, a4, a5
-; CHECK-M-NEXT: andi a5, a1, 32
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: xor a3, a3, a5
-; CHECK-M-NEXT: xor a2, a2, a4
-; CHECK-M-NEXT: andi a4, a1, 64
-; CHECK-M-NEXT: andi a1, a1, 128
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: xor a3, a3, a4
-; CHECK-M-NEXT: xor a2, a2, a3
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a0, a2, a0
-; CHECK-M-NEXT: srli a0, a0, 8
-; CHECK-M-NEXT: ret
-;
-; CHECK-ZBS-LABEL: clmulh_i8:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: zext.b a0, a0
-; CHECK-ZBS-NEXT: andi a2, a1, 2
-; CHECK-ZBS-NEXT: andi a3, a1, 1
-; CHECK-ZBS-NEXT: andi a4, a1, 4
-; CHECK-ZBS-NEXT: andi a5, a1, 8
-; CHECK-ZBS-NEXT: mul a2, a0, a2
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: xor a2, a3, a2
-; CHECK-ZBS-NEXT: andi a3, a1, 16
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: xor a4, a4, a5
-; CHECK-ZBS-NEXT: andi a5, a1, 32
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: xor a3, a3, a5
-; CHECK-ZBS-NEXT: xor a2, a2, a4
-; CHECK-ZBS-NEXT: andi a4, a1, 64
-; CHECK-ZBS-NEXT: andi a1, a1, 128
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: xor a3, a3, a4
-; CHECK-ZBS-NEXT: xor a2, a2, a3
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a0, a2, a0
-; CHECK-ZBS-NEXT: srli a0, a0, 8
-; CHECK-ZBS-NEXT: ret
+; CHECK-LABEL: clmulh_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: zext.b a0, a0
+; CHECK-NEXT: andi a2, a1, 2
+; CHECK-NEXT: andi a3, a1, 4
+; CHECK-NEXT: andi a4, a1, 8
+; CHECK-NEXT: andi a5, a1, 16
+; CHECK-NEXT: andi a6, a1, 32
+; CHECK-NEXT: andi a7, a1, 64
+; CHECK-NEXT: slli t0, a0, 1
+; CHECK-NEXT: seqz a2, a2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a2, a2, t0
+; CHECK-NEXT: slli t0, a0, 2
+; CHECK-NEXT: seqz a3, a3
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, t0
+; CHECK-NEXT: slli t0, a0, 3
+; CHECK-NEXT: seqz a4, a4
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a4, a4, t0
+; CHECK-NEXT: slli t0, a0, 4
+; CHECK-NEXT: seqz a5, a5
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a5, a5, t0
+; CHECK-NEXT: slli t0, a0, 5
+; CHECK-NEXT: seqz a6, a6
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a6, a6, t0
+; CHECK-NEXT: slli t0, a0, 6
+; CHECK-NEXT: seqz a7, a7
+; CHECK-NEXT: addi a7, a7, -1
+; CHECK-NEXT: and a7, a7, t0
+; CHECK-NEXT: andi t0, a1, 1
+; CHECK-NEXT: seqz t0, t0
+; CHECK-NEXT: addi t0, t0, -1
+; CHECK-NEXT: and t0, t0, a0
+; CHECK-NEXT: xor a2, t0, a2
+; CHECK-NEXT: xor a3, a3, a4
+; CHECK-NEXT: xor a4, a5, a6
+; CHECK-NEXT: andi a1, a1, 128
+; CHECK-NEXT: slli a0, a0, 7
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: xor a2, a2, a3
+; CHECK-NEXT: xor a3, a4, a7
+; CHECK-NEXT: xor a2, a2, a3
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: xor a0, a2, a0
+; CHECK-NEXT: srli a0, a0, 8
+; CHECK-NEXT: ret
%a.ext = zext i8 %a to i16
%b.ext = zext i8 %b to i16
%clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext)
@@ -1465,575 +132,424 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind {
define i16 @clmulh_i16(i16 %a, i16 %b) nounwind {
; RV32I-LABEL: clmulh_i16:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 1
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 2
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 4
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: lui a1, 8
-; RV32I-NEXT: xor s2, s3, a0
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s2, a0
+; RV32I-NEXT: andi a2, a1, 2
+; RV32I-NEXT: andi a3, a1, 4
+; RV32I-NEXT: andi a4, a1, 8
+; RV32I-NEXT: andi a6, a1, 16
+; RV32I-NEXT: andi a7, a1, 32
+; RV32I-NEXT: andi t1, a1, 64
+; RV32I-NEXT: andi t3, a1, 128
+; RV32I-NEXT: andi t4, a1, 256
+; RV32I-NEXT: andi t5, a1, 512
+; RV32I-NEXT: andi t6, a1, 1024
+; RV32I-NEXT: li a5, 1
+; RV32I-NEXT: lui t0, 1
+; RV32I-NEXT: lui t2, 2
+; RV32I-NEXT: lui s0, 4
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: seqz a6, a6
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: seqz t1, t1
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: seqz t5, t5
+; RV32I-NEXT: seqz t6, t6
+; RV32I-NEXT: slli s1, a0, 1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a2, a2, s1
+; RV32I-NEXT: slli s1, a0, 2
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and a3, a3, s1
+; RV32I-NEXT: slli s1, a0, 3
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a4, a4, s1
+; RV32I-NEXT: slli s1, a0, 4
+; RV32I-NEXT: addi a6, a6, -1
+; RV32I-NEXT: and a6, a6, s1
+; RV32I-NEXT: slli s1, a0, 5
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: and a7, a7, s1
+; RV32I-NEXT: slli s1, a0, 6
+; RV32I-NEXT: addi t1, t1, -1
+; RV32I-NEXT: and t1, t1, s1
+; RV32I-NEXT: slli s1, a0, 7
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: and t3, t3, s1
+; RV32I-NEXT: slli s1, a0, 8
+; RV32I-NEXT: addi t4, t4, -1
+; RV32I-NEXT: and t4, t4, s1
+; RV32I-NEXT: slli s1, a0, 9
+; RV32I-NEXT: addi t5, t5, -1
+; RV32I-NEXT: and t5, t5, s1
+; RV32I-NEXT: slli s1, a0, 10
+; RV32I-NEXT: addi t6, t6, -1
+; RV32I-NEXT: and t6, t6, s1
+; RV32I-NEXT: lui s1, 8
+; RV32I-NEXT: slli a5, a5, 11
+; RV32I-NEXT: and t0, a1, t0
+; RV32I-NEXT: and t2, a1, t2
+; RV32I-NEXT: and s0, a1, s0
+; RV32I-NEXT: and s1, a1, s1
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a0
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: xor a2, a6, a7
+; RV32I-NEXT: seqz a4, t0
+; RV32I-NEXT: xor a6, t3, t4
+; RV32I-NEXT: slli a7, a0, 12
+; RV32I-NEXT: seqz t0, t2
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a4, a4, a7
+; RV32I-NEXT: slli a7, a0, 13
+; RV32I-NEXT: seqz t2, s0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: and a7, t0, a7
+; RV32I-NEXT: slli t0, a0, 14
+; RV32I-NEXT: seqz t3, s1
+; RV32I-NEXT: addi t2, t2, -1
+; RV32I-NEXT: and t0, t2, t0
+; RV32I-NEXT: slli t2, a0, 15
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: and t2, t3, t2
+; RV32I-NEXT: xor a1, a1, a3
+; RV32I-NEXT: xor a2, a2, t1
+; RV32I-NEXT: xor a3, a6, t5
+; RV32I-NEXT: slli a0, a0, 11
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: and a0, a5, a0
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a2, a3, t6
+; RV32I-NEXT: xor a0, a0, a4
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a0, a0, a7
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: xor a1, t0, t2
+; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a0, a0, 16
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: clmulh_i16:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 256
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 512
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 1024
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: li a1, 1
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: slli a1, a1, 11
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
+; RV64I-NEXT: andi a2, a1, 2
+; RV64I-NEXT: andi a3, a1, 4
+; RV64I-NEXT: andi a4, a1, 8
+; RV64I-NEXT: andi a6, a1, 16
+; RV64I-NEXT: andi a7, a1, 32
+; RV64I-NEXT: andi t1, a1, 64
+; RV64I-NEXT: andi t3, a1, 128
+; RV64I-NEXT: andi t4, a1, 256
+; RV64I-NEXT: andi t5, a1, 512
+; RV64I-NEXT: andi t6, a1, 1024
+; RV64I-NEXT: li a5, 1
+; RV64I-NEXT: lui t0, 1
+; RV64I-NEXT: lui t2, 2
+; RV64I-NEXT: lui s0, 4
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: seqz a6, a6
+; RV64I-NEXT: seqz a7, a7
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: seqz t3, t3
+; RV64I-NEXT: seqz t4, t4
+; RV64I-NEXT: seqz t5, t5
+; RV64I-NEXT: seqz t6, t6
+; RV64I-NEXT: slli s1, a0, 1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a2, a2, s1
+; RV64I-NEXT: slli s1, a0, 2
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a3, a3, s1
+; RV64I-NEXT: slli s1, a0, 3
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, s1
+; RV64I-NEXT: slli s1, a0, 4
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a6, a6, s1
+; RV64I-NEXT: slli s1, a0, 5
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a7, a7, s1
+; RV64I-NEXT: slli s1, a0, 6
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: and t1, t1, s1
+; RV64I-NEXT: slli s1, a0, 7
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: and t3, t3, s1
+; RV64I-NEXT: slli s1, a0, 8
+; RV64I-NEXT: addi t4, t4, -1
+; RV64I-NEXT: and t4, t4, s1
+; RV64I-NEXT: slli s1, a0, 9
+; RV64I-NEXT: addi t5, t5, -1
+; RV64I-NEXT: and t5, t5, s1
+; RV64I-NEXT: slli s1, a0, 10
+; RV64I-NEXT: addi t6, t6, -1
+; RV64I-NEXT: and t6, t6, s1
+; RV64I-NEXT: lui s1, 8
+; RV64I-NEXT: slli a5, a5, 11
+; RV64I-NEXT: and t0, a1, t0
+; RV64I-NEXT: and t2, a1, t2
+; RV64I-NEXT: and s0, a1, s0
+; RV64I-NEXT: and s1, a1, s1
+; RV64I-NEXT: and a5, a1, a5
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: seqz a1, a1
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a1, a1, a0
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a3, a3, a4
+; RV64I-NEXT: xor a2, a6, a7
+; RV64I-NEXT: seqz a4, t0
+; RV64I-NEXT: xor a6, t3, t4
+; RV64I-NEXT: slli a7, a0, 12
+; RV64I-NEXT: seqz t0, t2
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, a7
+; RV64I-NEXT: slli a7, a0, 13
+; RV64I-NEXT: seqz t2, s0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: and a7, t0, a7
+; RV64I-NEXT: slli t0, a0, 14
+; RV64I-NEXT: seqz t3, s1
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and t0, t2, t0
+; RV64I-NEXT: slli t2, a0, 15
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: and t2, t3, t2
+; RV64I-NEXT: xor a1, a1, a3
+; RV64I-NEXT: xor a2, a2, t1
+; RV64I-NEXT: xor a3, a6, t5
+; RV64I-NEXT: slli a0, a0, 11
+; RV64I-NEXT: seqz a5, a5
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: and a0, a5, a0
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a2, a3, t6
+; RV64I-NEXT: xor a0, a0, a4
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a0, a0, a7
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: xor a1, t0, t2
+; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a0, a0, 16
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32IM-LABEL: clmulh_i16:
; RV32IM: # %bb.0:
+; RV32IM-NEXT: addi sp, sp, -16
+; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32IM-NEXT: slli a0, a0, 16
; RV32IM-NEXT: andi a2, a1, 2
-; RV32IM-NEXT: andi a3, a1, 1
-; RV32IM-NEXT: andi a4, a1, 4
-; RV32IM-NEXT: andi a5, a1, 8
-; RV32IM-NEXT: andi a6, a1, 16
+; RV32IM-NEXT: andi a3, a1, 4
+; RV32IM-NEXT: andi a4, a1, 8
+; RV32IM-NEXT: andi a5, a1, 16
; RV32IM-NEXT: andi a7, a1, 32
+; RV32IM-NEXT: andi t0, a1, 64
+; RV32IM-NEXT: andi t1, a1, 128
+; RV32IM-NEXT: andi t2, a1, 256
+; RV32IM-NEXT: andi t3, a1, 512
+; RV32IM-NEXT: andi t4, a1, 1024
+; RV32IM-NEXT: li a6, 1
+; RV32IM-NEXT: lui t5, 1
+; RV32IM-NEXT: lui t6, 2
+; RV32IM-NEXT: lui s0, 4
; RV32IM-NEXT: srli a0, a0, 16
-; RV32IM-NEXT: mul a2, a0, a2
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: andi a3, a1, 64
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: andi a5, a1, 128
-; RV32IM-NEXT: mul a6, a0, a6
-; RV32IM-NEXT: mul a7, a0, a7
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: andi a7, a1, 256
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: mul a7, a0, a7
-; RV32IM-NEXT: xor a5, a5, a7
-; RV32IM-NEXT: andi a7, a1, 512
-; RV32IM-NEXT: xor a2, a2, a4
-; RV32IM-NEXT: li a4, 1
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: xor a3, a6, a3
-; RV32IM-NEXT: lui a6, 1
-; RV32IM-NEXT: mul a7, a0, a7
-; RV32IM-NEXT: xor a5, a5, a7
-; RV32IM-NEXT: lui a7, 2
-; RV32IM-NEXT: slli a4, a4, 11
-; RV32IM-NEXT: and a6, a1, a6
-; RV32IM-NEXT: and a4, a1, a4
-; RV32IM-NEXT: mul a6, a0, a6
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: xor a4, a4, a6
-; RV32IM-NEXT: lui a6, 4
-; RV32IM-NEXT: xor a2, a2, a3
-; RV32IM-NEXT: lui a3, 8
-; RV32IM-NEXT: and a7, a1, a7
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: seqz t0, t0
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: seqz t2, t2
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: seqz t4, t4
+; RV32IM-NEXT: slli s1, a0, 1
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: and a2, a2, s1
+; RV32IM-NEXT: slli s1, a0, 2
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and a3, a3, s1
+; RV32IM-NEXT: slli s1, a0, 3
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: and a4, a4, s1
+; RV32IM-NEXT: slli s1, a0, 4
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: and a5, a5, s1
+; RV32IM-NEXT: slli s1, a0, 5
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: and a7, a7, s1
+; RV32IM-NEXT: slli s1, a0, 6
+; RV32IM-NEXT: addi t0, t0, -1
+; RV32IM-NEXT: and t0, t0, s1
+; RV32IM-NEXT: slli s1, a0, 7
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: and t1, t1, s1
+; RV32IM-NEXT: slli s1, a0, 8
+; RV32IM-NEXT: addi t2, t2, -1
+; RV32IM-NEXT: and t2, t2, s1
+; RV32IM-NEXT: slli s1, a0, 9
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: and t3, t3, s1
+; RV32IM-NEXT: slli s1, a0, 10
+; RV32IM-NEXT: addi t4, t4, -1
+; RV32IM-NEXT: and t4, t4, s1
+; RV32IM-NEXT: lui s1, 8
+; RV32IM-NEXT: slli a6, a6, 11
+; RV32IM-NEXT: and t5, a1, t5
+; RV32IM-NEXT: and t6, a1, t6
+; RV32IM-NEXT: and s0, a1, s0
+; RV32IM-NEXT: and s1, a1, s1
; RV32IM-NEXT: and a6, a1, a6
-; RV32IM-NEXT: and a3, a1, a3
-; RV32IM-NEXT: andi a1, a1, 1024
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: xor a1, a5, a1
-; RV32IM-NEXT: mul a5, a0, a7
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: mul a2, a0, a6
-; RV32IM-NEXT: xor a2, a4, a2
+; RV32IM-NEXT: andi a1, a1, 1
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: mul t5, a0, t5
+; RV32IM-NEXT: mul t6, a0, t6
+; RV32IM-NEXT: mul s0, a0, s0
+; RV32IM-NEXT: mul s1, a0, s1
+; RV32IM-NEXT: and a1, a1, a0
+; RV32IM-NEXT: mul a0, a0, a6
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: xor a2, a5, a7
+; RV32IM-NEXT: xor a4, t1, t2
+; RV32IM-NEXT: xor a0, a0, t5
+; RV32IM-NEXT: xor a1, a1, a3
+; RV32IM-NEXT: xor a2, a2, t0
+; RV32IM-NEXT: xor a3, a4, t3
+; RV32IM-NEXT: xor a0, a0, t6
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: mul a0, a0, a3
+; RV32IM-NEXT: xor a2, a3, t4
+; RV32IM-NEXT: xor a0, a0, s0
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: xor a0, a0, s1
; RV32IM-NEXT: xor a0, a1, a0
; RV32IM-NEXT: srli a0, a0, 16
+; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 16
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: clmulh_i16:
; RV64IM: # %bb.0:
+; RV64IM-NEXT: addi sp, sp, -16
+; RV64IM-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 0(sp) # 8-byte Folded Spill
; RV64IM-NEXT: slli a0, a0, 48
; RV64IM-NEXT: andi a2, a1, 2
-; RV64IM-NEXT: andi a3, a1, 1
-; RV64IM-NEXT: andi a4, a1, 4
-; RV64IM-NEXT: andi a5, a1, 8
-; RV64IM-NEXT: andi a6, a1, 16
+; RV64IM-NEXT: andi a3, a1, 4
+; RV64IM-NEXT: andi a4, a1, 8
+; RV64IM-NEXT: andi a5, a1, 16
; RV64IM-NEXT: andi a7, a1, 32
+; RV64IM-NEXT: andi t0, a1, 64
+; RV64IM-NEXT: andi t1, a1, 128
+; RV64IM-NEXT: andi t2, a1, 256
+; RV64IM-NEXT: andi t3, a1, 512
+; RV64IM-NEXT: andi t4, a1, 1024
+; RV64IM-NEXT: li a6, 1
+; RV64IM-NEXT: lui t5, 1
+; RV64IM-NEXT: lui t6, 2
+; RV64IM-NEXT: lui s0, 4
; RV64IM-NEXT: srli a0, a0, 48
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: andi a3, a1, 64
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: xor a4, a4, a5
-; RV64IM-NEXT: andi a5, a1, 128
-; RV64IM-NEXT: mul a6, a0, a6
-; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: xor a6, a6, a7
-; RV64IM-NEXT: andi a7, a1, 256
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: xor a5, a5, a7
-; RV64IM-NEXT: andi a7, a1, 512
-; RV64IM-NEXT: xor a2, a2, a4
-; RV64IM-NEXT: li a4, 1
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: xor a3, a6, a3
-; RV64IM-NEXT: lui a6, 1
-; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: xor a5, a5, a7
-; RV64IM-NEXT: lui a7, 2
-; RV64IM-NEXT: slli a4, a4, 11
-; RV64IM-NEXT: and a6, a1, a6
-; RV64IM-NEXT: and a4, a1, a4
-; RV64IM-NEXT: mul a6, a0, a6
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: xor a4, a4, a6
-; RV64IM-NEXT: lui a6, 4
-; RV64IM-NEXT: xor a2, a2, a3
-; RV64IM-NEXT: lui a3, 8
-; RV64IM-NEXT: and a7, a1, a7
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: seqz a3, a3
+; RV64IM-NEXT: seqz a4, a4
+; RV64IM-NEXT: seqz a5, a5
+; RV64IM-NEXT: seqz a7, a7
+; RV64IM-NEXT: seqz t0, t0
+; RV64IM-NEXT: seqz t1, t1
+; RV64IM-NEXT: seqz t2, t2
+; RV64IM-NEXT: seqz t3, t3
+; RV64IM-NEXT: seqz t4, t4
+; RV64IM-NEXT: slli s1, a0, 1
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: and a2, a2, s1
+; RV64IM-NEXT: slli s1, a0, 2
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and a3, a3, s1
+; RV64IM-NEXT: slli s1, a0, 3
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: and a4, a4, s1
+; RV64IM-NEXT: slli s1, a0, 4
+; RV64IM-NEXT: addi a5, a5, -1
+; RV64IM-NEXT: and a5, a5, s1
+; RV64IM-NEXT: slli s1, a0, 5
+; RV64IM-NEXT: addi a7, a7, -1
+; RV64IM-NEXT: and a7, a7, s1
+; RV64IM-NEXT: slli s1, a0, 6
+; RV64IM-NEXT: addi t0, t0, -1
+; RV64IM-NEXT: and t0, t0, s1
+; RV64IM-NEXT: slli s1, a0, 7
+; RV64IM-NEXT: addi t1, t1, -1
+; RV64IM-NEXT: and t1, t1, s1
+; RV64IM-NEXT: slli s1, a0, 8
+; RV64IM-NEXT: addi t2, t2, -1
+; RV64IM-NEXT: and t2, t2, s1
+; RV64IM-NEXT: slli s1, a0, 9
+; RV64IM-NEXT: addi t3, t3, -1
+; RV64IM-NEXT: and t3, t3, s1
+; RV64IM-NEXT: slli s1, a0, 10
+; RV64IM-NEXT: addi t4, t4, -1
+; RV64IM-NEXT: and t4, t4, s1
+; RV64IM-NEXT: lui s1, 8
+; RV64IM-NEXT: slli a6, a6, 11
+; RV64IM-NEXT: and t5, a1, t5
+; RV64IM-NEXT: and t6, a1, t6
+; RV64IM-NEXT: and s0, a1, s0
+; RV64IM-NEXT: and s1, a1, s1
; RV64IM-NEXT: and a6, a1, a6
-; RV64IM-NEXT: and a3, a1, a3
-; RV64IM-NEXT: andi a1, a1, 1024
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: xor a1, a5, a1
-; RV64IM-NEXT: mul a5, a0, a7
-; RV64IM-NEXT: xor a4, a4, a5
-; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: mul a2, a0, a6
-; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: andi a1, a1, 1
+; RV64IM-NEXT: seqz a1, a1
+; RV64IM-NEXT: addi a1, a1, -1
+; RV64IM-NEXT: mul t5, a0, t5
+; RV64IM-NEXT: mul t6, a0, t6
+; RV64IM-NEXT: mul s0, a0, s0
+; RV64IM-NEXT: mul s1, a0, s1
+; RV64IM-NEXT: and a1, a1, a0
+; RV64IM-NEXT: mul a0, a0, a6
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: xor a3, a3, a4
+; RV64IM-NEXT: xor a2, a5, a7
+; RV64IM-NEXT: xor a4, t1, t2
+; RV64IM-NEXT: xor a0, a0, t5
+; RV64IM-NEXT: xor a1, a1, a3
+; RV64IM-NEXT: xor a2, a2, t0
+; RV64IM-NEXT: xor a3, a4, t3
+; RV64IM-NEXT: xor a0, a0, t6
; RV64IM-NEXT: xor a1, a1, a2
-; RV64IM-NEXT: mul a0, a0, a3
+; RV64IM-NEXT: xor a2, a3, t4
+; RV64IM-NEXT: xor a0, a0, s0
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: xor a0, a0, s1
; RV64IM-NEXT: xor a0, a1, a0
; RV64IM-NEXT: srli a0, a0, 16
+; RV64IM-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 16
; RV64IM-NEXT: ret
;
; RV32IMZBS-LABEL: clmulh_i16:
@@ -2041,9 +557,9 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind {
; RV32IMZBS-NEXT: addi sp, sp, -16
; RV32IMZBS-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: slli a0, a0, 16
-; RV32IMZBS-NEXT: andi a2, a1, 2
-; RV32IMZBS-NEXT: andi a3, a1, 1
+; RV32IMZBS-NEXT: andi a3, a1, 2
; RV32IMZBS-NEXT: andi a4, a1, 4
; RV32IMZBS-NEXT: andi a5, a1, 8
; RV32IMZBS-NEXT: andi a6, a1, 16
@@ -2052,63 +568,103 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind {
; RV32IMZBS-NEXT: andi t1, a1, 128
; RV32IMZBS-NEXT: andi t2, a1, 256
; RV32IMZBS-NEXT: andi t3, a1, 512
-; RV32IMZBS-NEXT: bseti t4, zero, 11
-; RV32IMZBS-NEXT: lui t5, 1
-; RV32IMZBS-NEXT: lui t6, 2
-; RV32IMZBS-NEXT: lui s0, 4
-; RV32IMZBS-NEXT: lui s1, 8
-; RV32IMZBS-NEXT: and t4, a1, t4
-; RV32IMZBS-NEXT: and t5, a1, t5
-; RV32IMZBS-NEXT: and t6, a1, t6
-; RV32IMZBS-NEXT: and s0, a1, s0
-; RV32IMZBS-NEXT: and s1, a1, s1
-; RV32IMZBS-NEXT: andi a1, a1, 1024
+; RV32IMZBS-NEXT: andi t4, a1, 1024
+; RV32IMZBS-NEXT: not a2, a1
; RV32IMZBS-NEXT: srli a0, a0, 16
-; RV32IMZBS-NEXT: mul a2, a0, a2
-; RV32IMZBS-NEXT: mul a3, a0, a3
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: mul a6, a0, a6
-; RV32IMZBS-NEXT: mul a7, a0, a7
-; RV32IMZBS-NEXT: mul t0, a0, t0
-; RV32IMZBS-NEXT: mul t1, a0, t1
-; RV32IMZBS-NEXT: mul t2, a0, t2
-; RV32IMZBS-NEXT: mul t3, a0, t3
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: mul t4, a0, t4
-; RV32IMZBS-NEXT: mul t5, a0, t5
-; RV32IMZBS-NEXT: mul t6, a0, t6
-; RV32IMZBS-NEXT: mul s0, a0, s0
-; RV32IMZBS-NEXT: mul a0, a0, s1
-; RV32IMZBS-NEXT: xor a2, a3, a2
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: seqz a6, a6
+; RV32IMZBS-NEXT: seqz a7, a7
+; RV32IMZBS-NEXT: seqz t0, t0
+; RV32IMZBS-NEXT: seqz t1, t1
+; RV32IMZBS-NEXT: seqz t2, t2
+; RV32IMZBS-NEXT: seqz t3, t3
+; RV32IMZBS-NEXT: seqz t4, t4
+; RV32IMZBS-NEXT: bexti t5, a2, 11
+; RV32IMZBS-NEXT: bexti t6, a2, 12
+; RV32IMZBS-NEXT: bexti s0, a2, 13
+; RV32IMZBS-NEXT: bexti s1, a2, 14
+; RV32IMZBS-NEXT: slli s2, a0, 1
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a3, a3, s2
+; RV32IMZBS-NEXT: slli s2, a0, 2
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a4, a4, s2
+; RV32IMZBS-NEXT: slli s2, a0, 3
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a5, a5, s2
+; RV32IMZBS-NEXT: slli s2, a0, 4
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and a6, a6, s2
+; RV32IMZBS-NEXT: slli s2, a0, 5
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: and a7, a7, s2
+; RV32IMZBS-NEXT: slli s2, a0, 6
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: and t0, t0, s2
+; RV32IMZBS-NEXT: slli s2, a0, 7
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: and t1, t1, s2
+; RV32IMZBS-NEXT: slli s2, a0, 8
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: and t2, t2, s2
+; RV32IMZBS-NEXT: slli s2, a0, 9
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: and t3, t3, s2
+; RV32IMZBS-NEXT: slli s2, a0, 10
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: and t4, t4, s2
+; RV32IMZBS-NEXT: slli s2, a0, 11
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and t5, t5, s2
+; RV32IMZBS-NEXT: slli s2, a0, 12
+; RV32IMZBS-NEXT: addi t6, t6, -1
+; RV32IMZBS-NEXT: and t6, t6, s2
+; RV32IMZBS-NEXT: slli s2, a0, 13
+; RV32IMZBS-NEXT: addi s0, s0, -1
+; RV32IMZBS-NEXT: and s0, s0, s2
+; RV32IMZBS-NEXT: slli s2, a0, 14
+; RV32IMZBS-NEXT: addi s1, s1, -1
+; RV32IMZBS-NEXT: and s1, s1, s2
+; RV32IMZBS-NEXT: andi a1, a1, 1
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: bexti a2, a2, 15
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a1, a1, a0
+; RV32IMZBS-NEXT: slli a0, a0, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a0, a2, a0
+; RV32IMZBS-NEXT: xor a1, a1, a3
; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: xor a3, a6, a7
-; RV32IMZBS-NEXT: xor a5, t1, t2
-; RV32IMZBS-NEXT: xor a6, t4, t5
-; RV32IMZBS-NEXT: xor a2, a2, a4
-; RV32IMZBS-NEXT: xor a3, a3, t0
-; RV32IMZBS-NEXT: xor a4, a5, t3
-; RV32IMZBS-NEXT: xor a5, a6, t6
-; RV32IMZBS-NEXT: xor a2, a2, a3
-; RV32IMZBS-NEXT: xor a1, a4, a1
+; RV32IMZBS-NEXT: xor a2, a6, a7
+; RV32IMZBS-NEXT: xor a3, t1, t2
+; RV32IMZBS-NEXT: xor a5, t5, t6
+; RV32IMZBS-NEXT: xor a1, a1, a4
+; RV32IMZBS-NEXT: xor a2, a2, t0
+; RV32IMZBS-NEXT: xor a3, a3, t3
; RV32IMZBS-NEXT: xor a5, a5, s0
-; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: xor a2, a3, t4
+; RV32IMZBS-NEXT: xor a5, a5, s1
+; RV32IMZBS-NEXT: xor a1, a1, a2
; RV32IMZBS-NEXT: xor a0, a5, a0
; RV32IMZBS-NEXT: xor a0, a1, a0
; RV32IMZBS-NEXT: srli a0, a0, 16
; RV32IMZBS-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: addi sp, sp, 16
; RV32IMZBS-NEXT: ret
;
; RV64IMZBS-LABEL: clmulh_i16:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -16
-; RV64IMZBS-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 0(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: addi sp, sp, -32
+; RV64IMZBS-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
; RV64IMZBS-NEXT: slli a0, a0, 48
-; RV64IMZBS-NEXT: andi a2, a1, 2
-; RV64IMZBS-NEXT: andi a3, a1, 1
+; RV64IMZBS-NEXT: andi a3, a1, 2
; RV64IMZBS-NEXT: andi a4, a1, 4
; RV64IMZBS-NEXT: andi a5, a1, 8
; RV64IMZBS-NEXT: andi a6, a1, 16
@@ -2117,53 +673,93 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind {
; RV64IMZBS-NEXT: andi t1, a1, 128
; RV64IMZBS-NEXT: andi t2, a1, 256
; RV64IMZBS-NEXT: andi t3, a1, 512
-; RV64IMZBS-NEXT: bseti t4, zero, 11
-; RV64IMZBS-NEXT: lui t5, 1
-; RV64IMZBS-NEXT: lui t6, 2
-; RV64IMZBS-NEXT: lui s0, 4
-; RV64IMZBS-NEXT: lui s1, 8
-; RV64IMZBS-NEXT: and t4, a1, t4
-; RV64IMZBS-NEXT: and t5, a1, t5
-; RV64IMZBS-NEXT: and t6, a1, t6
-; RV64IMZBS-NEXT: and s0, a1, s0
-; RV64IMZBS-NEXT: and s1, a1, s1
-; RV64IMZBS-NEXT: andi a1, a1, 1024
+; RV64IMZBS-NEXT: andi t4, a1, 1024
+; RV64IMZBS-NEXT: not a2, a1
; RV64IMZBS-NEXT: srli a0, a0, 48
-; RV64IMZBS-NEXT: mul a2, a0, a2
-; RV64IMZBS-NEXT: mul a3, a0, a3
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: mul a5, a0, a5
-; RV64IMZBS-NEXT: mul a6, a0, a6
-; RV64IMZBS-NEXT: mul a7, a0, a7
-; RV64IMZBS-NEXT: mul t0, a0, t0
-; RV64IMZBS-NEXT: mul t1, a0, t1
-; RV64IMZBS-NEXT: mul t2, a0, t2
-; RV64IMZBS-NEXT: mul t3, a0, t3
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: mul t4, a0, t4
-; RV64IMZBS-NEXT: mul t5, a0, t5
-; RV64IMZBS-NEXT: mul t6, a0, t6
-; RV64IMZBS-NEXT: mul s0, a0, s0
-; RV64IMZBS-NEXT: mul a0, a0, s1
-; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: seqz a4, a4
+; RV64IMZBS-NEXT: seqz a5, a5
+; RV64IMZBS-NEXT: seqz a6, a6
+; RV64IMZBS-NEXT: seqz a7, a7
+; RV64IMZBS-NEXT: seqz t0, t0
+; RV64IMZBS-NEXT: seqz t1, t1
+; RV64IMZBS-NEXT: seqz t2, t2
+; RV64IMZBS-NEXT: seqz t3, t3
+; RV64IMZBS-NEXT: seqz t4, t4
+; RV64IMZBS-NEXT: bexti t5, a2, 11
+; RV64IMZBS-NEXT: bexti t6, a2, 12
+; RV64IMZBS-NEXT: bexti s0, a2, 13
+; RV64IMZBS-NEXT: bexti s1, a2, 14
+; RV64IMZBS-NEXT: slli s2, a0, 1
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, s2
+; RV64IMZBS-NEXT: slli s2, a0, 2
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a4, a4, s2
+; RV64IMZBS-NEXT: slli s2, a0, 3
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a5, a5, s2
+; RV64IMZBS-NEXT: slli s2, a0, 4
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: and a6, a6, s2
+; RV64IMZBS-NEXT: slli s2, a0, 5
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: and a7, a7, s2
+; RV64IMZBS-NEXT: slli s2, a0, 6
+; RV64IMZBS-NEXT: addi t0, t0, -1
+; RV64IMZBS-NEXT: and t0, t0, s2
+; RV64IMZBS-NEXT: slli s2, a0, 7
+; RV64IMZBS-NEXT: addi t1, t1, -1
+; RV64IMZBS-NEXT: and t1, t1, s2
+; RV64IMZBS-NEXT: slli s2, a0, 8
+; RV64IMZBS-NEXT: addi t2, t2, -1
+; RV64IMZBS-NEXT: and t2, t2, s2
+; RV64IMZBS-NEXT: slli s2, a0, 9
+; RV64IMZBS-NEXT: addi t3, t3, -1
+; RV64IMZBS-NEXT: and t3, t3, s2
+; RV64IMZBS-NEXT: slli s2, a0, 10
+; RV64IMZBS-NEXT: addi t4, t4, -1
+; RV64IMZBS-NEXT: and t4, t4, s2
+; RV64IMZBS-NEXT: slli s2, a0, 11
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: and t5, t5, s2
+; RV64IMZBS-NEXT: slli s2, a0, 12
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: and t6, t6, s2
+; RV64IMZBS-NEXT: slli s2, a0, 13
+; RV64IMZBS-NEXT: addi s0, s0, -1
+; RV64IMZBS-NEXT: and s0, s0, s2
+; RV64IMZBS-NEXT: slli s2, a0, 14
+; RV64IMZBS-NEXT: addi s1, s1, -1
+; RV64IMZBS-NEXT: and s1, s1, s2
+; RV64IMZBS-NEXT: andi a1, a1, 1
+; RV64IMZBS-NEXT: seqz a1, a1
+; RV64IMZBS-NEXT: bexti a2, a2, 15
+; RV64IMZBS-NEXT: addi a1, a1, -1
+; RV64IMZBS-NEXT: and a1, a1, a0
+; RV64IMZBS-NEXT: slli a0, a0, 15
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: and a0, a2, a0
+; RV64IMZBS-NEXT: xor a1, a1, a3
; RV64IMZBS-NEXT: xor a4, a4, a5
-; RV64IMZBS-NEXT: xor a3, a6, a7
-; RV64IMZBS-NEXT: xor a5, t1, t2
-; RV64IMZBS-NEXT: xor a6, t4, t5
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: xor a3, a3, t0
-; RV64IMZBS-NEXT: xor a4, a5, t3
-; RV64IMZBS-NEXT: xor a5, a6, t6
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: xor a1, a4, a1
+; RV64IMZBS-NEXT: xor a2, a6, a7
+; RV64IMZBS-NEXT: xor a3, t1, t2
+; RV64IMZBS-NEXT: xor a5, t5, t6
+; RV64IMZBS-NEXT: xor a1, a1, a4
+; RV64IMZBS-NEXT: xor a2, a2, t0
+; RV64IMZBS-NEXT: xor a3, a3, t3
; RV64IMZBS-NEXT: xor a5, a5, s0
-; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: xor a1, a1, a2
+; RV64IMZBS-NEXT: xor a2, a3, t4
+; RV64IMZBS-NEXT: xor a5, a5, s1
+; RV64IMZBS-NEXT: xor a1, a1, a2
; RV64IMZBS-NEXT: xor a0, a5, a0
; RV64IMZBS-NEXT: xor a0, a1, a0
; RV64IMZBS-NEXT: srli a0, a0, 16
-; RV64IMZBS-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 16
+; RV64IMZBS-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 32
; RV64IMZBS-NEXT: ret
%a.ext = zext i16 %a to i32
%b.ext = zext i16 %b to i32
@@ -2176,1495 +772,1578 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind {
define i32 @clmulh_i32(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: clmulh_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -48
-; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: srli a2, a0, 8
-; RV32I-NEXT: lui s8, 16
-; RV32I-NEXT: srli a3, a0, 24
-; RV32I-NEXT: slli a4, a0, 24
-; RV32I-NEXT: lui a5, 61681
-; RV32I-NEXT: lui a6, 209715
-; RV32I-NEXT: srli a7, a1, 8
-; RV32I-NEXT: addi s2, s8, -256
-; RV32I-NEXT: and a2, a2, s2
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: and a7, a7, s2
-; RV32I-NEXT: or a3, a7, a3
-; RV32I-NEXT: lui s6, 349525
-; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: addi sp, sp, -112
+; RV32I-NEXT: sw ra, 108(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 104(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 100(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 96(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 92(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 88(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 84(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 80(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 76(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s8, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s9, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s10, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s11, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli s0, a0, 8
+; RV32I-NEXT: lui t5, 16
+; RV32I-NEXT: srli s1, a0, 24
+; RV32I-NEXT: slli s2, a0, 24
+; RV32I-NEXT: lui a4, 61681
+; RV32I-NEXT: lui a5, 209715
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: srli s6, a1, 8
+; RV32I-NEXT: srli s4, a1, 24
+; RV32I-NEXT: slli s5, a1, 24
+; RV32I-NEXT: li a7, 1
+; RV32I-NEXT: lui ra, 1
+; RV32I-NEXT: lui a3, 2
+; RV32I-NEXT: lui t2, 4
+; RV32I-NEXT: lui t0, 8
+; RV32I-NEXT: lui t1, 32
+; RV32I-NEXT: lui a6, 64
+; RV32I-NEXT: lui t6, 128
+; RV32I-NEXT: addi t3, t5, -256
+; RV32I-NEXT: addi t4, a4, -241
+; RV32I-NEXT: addi s3, a5, 819
+; RV32I-NEXT: addi s11, a2, 1365
+; RV32I-NEXT: slli a2, a7, 11
+; RV32I-NEXT: and a4, s0, t3
+; RV32I-NEXT: and a0, a0, t3
+; RV32I-NEXT: and a5, s6, t3
+; RV32I-NEXT: and a1, a1, t3
+; RV32I-NEXT: or a4, a4, s1
; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a0, a4, a0
-; RV32I-NEXT: slli a4, a1, 24
-; RV32I-NEXT: addi s5, a5, -241
-; RV32I-NEXT: addi s4, a6, 819
-; RV32I-NEXT: and a1, a1, s2
+; RV32I-NEXT: or a5, a5, s4
; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: addi s3, s6, 1365
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: and a2, a2, s5
+; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: or a1, s5, a1
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: or a1, a1, a5
+; RV32I-NEXT: srli a4, a0, 4
+; RV32I-NEXT: and a0, a0, t4
+; RV32I-NEXT: srli a5, a1, 4
+; RV32I-NEXT: and a1, a1, t4
+; RV32I-NEXT: and a4, a4, t4
; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: and a3, a3, s5
+; RV32I-NEXT: and a5, a5, t4
; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s4
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 1
+; RV32I-NEXT: or a0, a4, a0
+; RV32I-NEXT: or a1, a5, a1
+; RV32I-NEXT: srli a4, a0, 2
; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: srli a3, a1, 1
+; RV32I-NEXT: srli a5, a1, 2
; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: and a2, a2, s3
+; RV32I-NEXT: and a4, a4, s3
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: and a5, a5, s3
+; RV32I-NEXT: slli a1, a1, 2
+; RV32I-NEXT: or a0, a4, a0
+; RV32I-NEXT: or a1, a5, a1
+; RV32I-NEXT: srli a4, a0, 1
+; RV32I-NEXT: and a0, a0, s11
+; RV32I-NEXT: srli a5, a1, 1
+; RV32I-NEXT: and s0, a1, s11
+; RV32I-NEXT: and a1, a4, s11
; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: or s0, a2, a0
-; RV32I-NEXT: or s7, a3, a1
-; RV32I-NEXT: andi a1, s7, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, a0, s1
-; RV32I-NEXT: andi a1, s7, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: andi a1, s7, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: andi a1, s7, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: and a1, s7, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: and a1, s7, s8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lui a1, 1024
-; RV32I-NEXT: xor s8, s9, a0
-; RV32I-NEXT: and a1, s7, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lui a1, 131072
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: and a1, s7, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: addi a1, s6, 1364
-; RV32I-NEXT: xor a0, s8, a0
-; RV32I-NEXT: srli a2, a0, 8
-; RV32I-NEXT: srli a3, a0, 24
-; RV32I-NEXT: and a2, a2, s2
-; RV32I-NEXT: and a4, a0, s2
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: or a0, a0, a4
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a2, a2, s5
+; RV32I-NEXT: and a4, a5, s11
+; RV32I-NEXT: slli s0, s0, 1
+; RV32I-NEXT: or s4, a1, a0
+; RV32I-NEXT: or a7, a4, s0
+; RV32I-NEXT: srli s0, s0, 31
+; RV32I-NEXT: slli a4, s4, 1
+; RV32I-NEXT: andi a5, a7, 2
+; RV32I-NEXT: slli s1, s4, 2
+; RV32I-NEXT: andi s2, a7, 4
+; RV32I-NEXT: slli a0, s4, 3
+; RV32I-NEXT: andi s5, a7, 8
+; RV32I-NEXT: slli s6, s4, 4
+; RV32I-NEXT: andi s7, a7, 16
+; RV32I-NEXT: slli s8, s4, 5
+; RV32I-NEXT: andi s9, a7, 32
+; RV32I-NEXT: slli s10, s4, 31
+; RV32I-NEXT: seqz s0, s0
+; RV32I-NEXT: addi s0, s0, -1
+; RV32I-NEXT: and a1, s0, s10
+; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s0, s4, 6
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: and a4, a5, a4
+; RV32I-NEXT: sw a4, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a7, 64
+; RV32I-NEXT: seqz a5, s2
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: and a5, a5, s1
+; RV32I-NEXT: sw a5, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s4, 7
+; RV32I-NEXT: seqz s1, s5
+; RV32I-NEXT: addi s1, s1, -1
+; RV32I-NEXT: and a0, s1, a0
+; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi s1, a7, 128
+; RV32I-NEXT: seqz s2, s7
+; RV32I-NEXT: addi s2, s2, -1
+; RV32I-NEXT: and a0, s2, s6
+; RV32I-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s2, s4, 8
+; RV32I-NEXT: seqz s6, s9
+; RV32I-NEXT: addi s6, s6, -1
+; RV32I-NEXT: and a0, s6, s8
+; RV32I-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi s8, a7, 256
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a4, a4, s0
+; RV32I-NEXT: sw a4, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a4, s4, 9
+; RV32I-NEXT: seqz s0, s1
+; RV32I-NEXT: addi s0, s0, -1
+; RV32I-NEXT: and a5, s0, a5
+; RV32I-NEXT: sw a5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a5, a7, 512
+; RV32I-NEXT: seqz s0, s8
+; RV32I-NEXT: addi s0, s0, -1
+; RV32I-NEXT: and a0, s0, s2
+; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s0, s4, 10
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: and a4, a5, a4
+; RV32I-NEXT: sw a4, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a7, 1024
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a4, a4, s0
+; RV32I-NEXT: sw a4, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a4, s4, 11
+; RV32I-NEXT: and a5, a7, a2
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: and a4, a5, a4
+; RV32I-NEXT: sw a4, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a4, s4, 12
+; RV32I-NEXT: and a5, a7, ra
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: and ra, a5, a4
+; RV32I-NEXT: slli a4, s4, 13
+; RV32I-NEXT: and a2, a7, a3
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and s7, a2, a4
+; RV32I-NEXT: slli a2, s4, 14
+; RV32I-NEXT: and a4, a7, t2
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and s8, a4, a2
+; RV32I-NEXT: slli a2, s4, 15
+; RV32I-NEXT: and a4, a7, t0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and s6, a4, a2
+; RV32I-NEXT: slli a2, s4, 16
+; RV32I-NEXT: and a3, a7, t5
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and s2, a3, a2
+; RV32I-NEXT: slli a2, s4, 17
+; RV32I-NEXT: and a3, a7, t1
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and t5, a3, a2
+; RV32I-NEXT: slli a2, s4, 18
+; RV32I-NEXT: and a3, a7, a6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and s0, a3, a2
+; RV32I-NEXT: slli a2, s4, 19
+; RV32I-NEXT: and a3, a7, t6
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and s1, a3, a2
+; RV32I-NEXT: lui a2, 256
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, s4, 20
+; RV32I-NEXT: and t6, a2, a3
+; RV32I-NEXT: lui a2, 512
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, s4, 21
+; RV32I-NEXT: and s5, a2, a3
+; RV32I-NEXT: lui a2, 1024
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, s4, 22
+; RV32I-NEXT: and t2, a2, a3
+; RV32I-NEXT: lui a3, 2048
+; RV32I-NEXT: and a3, a7, a3
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a4, s4, 23
+; RV32I-NEXT: and t0, a3, a4
+; RV32I-NEXT: lui a4, 4096
+; RV32I-NEXT: and a4, a7, a4
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, s4, 24
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: lui a5, 8192
+; RV32I-NEXT: and a5, a7, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, s4, 25
+; RV32I-NEXT: and t1, a5, a6
+; RV32I-NEXT: lui a5, 16384
+; RV32I-NEXT: and a5, a7, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, s4, 26
+; RV32I-NEXT: and a3, a5, a6
+; RV32I-NEXT: lui a5, 32768
+; RV32I-NEXT: and a5, a7, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, s4, 27
+; RV32I-NEXT: and a6, a5, a6
+; RV32I-NEXT: lui a5, 65536
+; RV32I-NEXT: and a5, a7, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a0, s4, 28
+; RV32I-NEXT: and a5, a5, a0
+; RV32I-NEXT: lui a0, 131072
+; RV32I-NEXT: and a0, a7, a0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, s4, 29
+; RV32I-NEXT: and a2, a0, a1
+; RV32I-NEXT: lui a1, 262144
+; RV32I-NEXT: and a1, a7, a1
+; RV32I-NEXT: andi a7, a7, 1
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: and a7, a7, s4
+; RV32I-NEXT: slli s4, s4, 30
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, a0
+; RV32I-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, a0, s4
+; RV32I-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, s10
+; RV32I-NEXT: lw s10, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, s10, s9
+; RV32I-NEXT: lw s9, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, s9, ra
+; RV32I-NEXT: xor t5, s2, t5
+; RV32I-NEXT: xor t0, t2, t0
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: xor a2, a7, s4
+; RV32I-NEXT: lw a7, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a7
+; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, s10, a7
+; RV32I-NEXT: xor t2, s9, s7
+; RV32I-NEXT: xor t5, t5, s0
+; RV32I-NEXT: xor a4, t0, a4
+; RV32I-NEXT: lw t0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, t0
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: lw a2, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a7, a2
+; RV32I-NEXT: xor a7, t2, s8
+; RV32I-NEXT: xor t0, t5, s1
+; RV32I-NEXT: xor a4, a4, t1
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: xor a2, a7, s6
+; RV32I-NEXT: xor a7, t0, t6
+; RV32I-NEXT: xor a3, a4, a3
+; RV32I-NEXT: lui a4, 349525
+; RV32I-NEXT: addi a4, a4, 1364
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: xor a2, a7, s5
+; RV32I-NEXT: xor a3, a3, a6
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: xor a3, a3, a5
+; RV32I-NEXT: xor a0, a0, a3
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: srli a2, a0, 24
+; RV32I-NEXT: slli a3, a0, 24
+; RV32I-NEXT: and a0, a0, t3
+; RV32I-NEXT: and a1, a1, t3
+; RV32I-NEXT: slli a0, a0, 8
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: and a0, a0, t4
+; RV32I-NEXT: and a1, a1, t4
; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: srli a2, a0, 1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: and a1, a1, s3
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a0, a0, s11
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: slli a0, a0, 1
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: lw ra, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 104(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 100(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 96(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 92(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 88(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 84(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 80(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s11, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 112
; RV32I-NEXT: ret
;
; RV64I-LABEL: clmulh_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi sp, sp, -160
+; RV64I-NEXT: sd ra, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli s0, a0, 32
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 256
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 512
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 1024
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: li a1, 1
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: slli a1, a1, 11
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 32
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 64
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 256
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 512
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1024
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2048
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 4096
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 16384
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 32768
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 65536
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 131072
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 262144
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: srliw a1, s1, 31
-; RV64I-NEXT: slli a1, a1, 31
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
+; RV64I-NEXT: andi a2, a1, 2
+; RV64I-NEXT: andi a3, a1, 1
+; RV64I-NEXT: andi a4, a1, 4
+; RV64I-NEXT: andi a5, a1, 8
+; RV64I-NEXT: andi a6, a1, 16
+; RV64I-NEXT: andi a7, a1, 32
+; RV64I-NEXT: andi t0, a1, 64
+; RV64I-NEXT: andi t1, a1, 128
+; RV64I-NEXT: andi t3, a1, 256
+; RV64I-NEXT: andi t4, a1, 512
+; RV64I-NEXT: andi t5, a1, 1024
+; RV64I-NEXT: lui s0, 16
+; RV64I-NEXT: lui s2, 32
+; RV64I-NEXT: lui s8, 64
+; RV64I-NEXT: lui s5, 128
+; RV64I-NEXT: lui s1, 256
+; RV64I-NEXT: lui t6, 512
+; RV64I-NEXT: lui s3, 1024
+; RV64I-NEXT: lui t2, 2048
+; RV64I-NEXT: lui s11, 4096
+; RV64I-NEXT: srli s4, a0, 31
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and s4, a2, s4
+; RV64I-NEXT: srli a2, a0, 32
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and s6, a3, a2
+; RV64I-NEXT: srli a2, a0, 30
+; RV64I-NEXT: seqz a3, a4
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and s7, a3, a2
+; RV64I-NEXT: srli a2, a0, 29
+; RV64I-NEXT: seqz a3, a5
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and s9, a3, a2
+; RV64I-NEXT: srli a2, a0, 28
+; RV64I-NEXT: seqz a3, a6
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and s10, a3, a2
+; RV64I-NEXT: srli a2, a0, 27
+; RV64I-NEXT: seqz a3, a7
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and ra, a3, a2
+; RV64I-NEXT: srli a2, a0, 26
+; RV64I-NEXT: seqz a3, t0
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli a2, a0, 25
+; RV64I-NEXT: seqz a3, t1
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and t1, a3, a2
+; RV64I-NEXT: srli a2, a0, 24
+; RV64I-NEXT: seqz a3, t3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a7, a3, a2
+; RV64I-NEXT: srli a2, a0, 23
+; RV64I-NEXT: seqz a3, t4
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli a2, a0, 22
+; RV64I-NEXT: seqz a3, t5
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui t0, 8192
+; RV64I-NEXT: li a2, 1
+; RV64I-NEXT: slli a2, a2, 11
+; RV64I-NEXT: lui a3, 1
+; RV64I-NEXT: and a3, a1, a3
+; RV64I-NEXT: lui a4, 2
+; RV64I-NEXT: and a4, a1, a4
+; RV64I-NEXT: lui a5, 4
+; RV64I-NEXT: and a5, a1, a5
+; RV64I-NEXT: lui a6, 8
+; RV64I-NEXT: and a6, a1, a6
+; RV64I-NEXT: and s0, a1, s0
+; RV64I-NEXT: and s2, a1, s2
+; RV64I-NEXT: and s8, a1, s8
+; RV64I-NEXT: and s5, a1, s5
+; RV64I-NEXT: and s1, a1, s1
+; RV64I-NEXT: and t6, a1, t6
+; RV64I-NEXT: and t4, a1, s3
+; RV64I-NEXT: and s3, a1, t2
+; RV64I-NEXT: and t2, a1, s11
+; RV64I-NEXT: sd t2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t0, a1, t0
+; RV64I-NEXT: sd t0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui t0, 16384
+; RV64I-NEXT: and t0, a1, t0
+; RV64I-NEXT: sd t0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui t0, 32768
+; RV64I-NEXT: and s11, a1, t0
+; RV64I-NEXT: lui t0, 65536
+; RV64I-NEXT: and t3, a1, t0
+; RV64I-NEXT: lui t0, 131072
+; RV64I-NEXT: and t5, a1, t0
+; RV64I-NEXT: lui t0, 262144
+; RV64I-NEXT: and t0, a1, t0
+; RV64I-NEXT: and t2, a1, a2
+; RV64I-NEXT: sraiw a1, a1, 31
+; RV64I-NEXT: seqz a1, a1
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: srli a2, a0, 1
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor s4, s6, s4
+; RV64I-NEXT: xor s6, s7, s9
+; RV64I-NEXT: xor s7, s10, ra
+; RV64I-NEXT: xor s9, t1, a7
+; RV64I-NEXT: seqz a1, a3
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: srli a2, a0, 20
+; RV64I-NEXT: and a7, a1, a2
+; RV64I-NEXT: seqz a1, a4
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: srli a2, a0, 19
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: seqz a2, a5
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: srli a3, a0, 18
+; RV64I-NEXT: and s10, a2, a3
+; RV64I-NEXT: seqz a2, a6
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: srli a3, a0, 17
+; RV64I-NEXT: and ra, a2, a3
+; RV64I-NEXT: seqz a2, s0
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: and a3, a2, a3
+; RV64I-NEXT: seqz a2, s2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: srli a4, a0, 15
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: seqz a4, s8
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: srli s0, a0, 14
+; RV64I-NEXT: and s0, a4, s0
+; RV64I-NEXT: seqz a4, s5
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: srli a5, a0, 13
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: seqz a5, s1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: srli a6, a0, 12
+; RV64I-NEXT: and a5, a5, a6
+; RV64I-NEXT: seqz a6, t6
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: srli t1, a0, 11
+; RV64I-NEXT: and a6, a6, t1
+; RV64I-NEXT: seqz t1, t4
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: srli t4, a0, 10
+; RV64I-NEXT: and t1, t1, t4
+; RV64I-NEXT: seqz t4, s3
+; RV64I-NEXT: addi t4, t4, -1
+; RV64I-NEXT: srli t6, a0, 9
+; RV64I-NEXT: and t4, t4, t6
+; RV64I-NEXT: ld t6, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t6, t6
+; RV64I-NEXT: addi t6, t6, -1
+; RV64I-NEXT: srli s1, a0, 8
+; RV64I-NEXT: and t6, t6, s1
+; RV64I-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz s1, s1
+; RV64I-NEXT: addi s1, s1, -1
+; RV64I-NEXT: srli s2, a0, 7
+; RV64I-NEXT: and s1, s1, s2
+; RV64I-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz s2, s2
+; RV64I-NEXT: addi s2, s2, -1
+; RV64I-NEXT: srli s3, a0, 6
+; RV64I-NEXT: and s2, s2, s3
+; RV64I-NEXT: seqz s3, s11
+; RV64I-NEXT: addi s3, s3, -1
+; RV64I-NEXT: srli s5, a0, 5
+; RV64I-NEXT: and s3, s3, s5
+; RV64I-NEXT: seqz t3, t3
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: srli s5, a0, 4
+; RV64I-NEXT: and t3, t3, s5
+; RV64I-NEXT: seqz t5, t5
+; RV64I-NEXT: addi t5, t5, -1
+; RV64I-NEXT: srli s5, a0, 3
+; RV64I-NEXT: and t5, t5, s5
+; RV64I-NEXT: seqz t0, t0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: srli s5, a0, 2
+; RV64I-NEXT: and t0, t0, s5
+; RV64I-NEXT: xor s4, s4, s6
+; RV64I-NEXT: ld s5, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s5, s7, s5
+; RV64I-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s6, s9, s6
+; RV64I-NEXT: srli a0, a0, 21
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and a0, t2, a0
+; RV64I-NEXT: xor t2, s10, ra
+; RV64I-NEXT: xor a4, a4, a5
+; RV64I-NEXT: xor a5, s1, s2
+; RV64I-NEXT: xor s1, s4, s5
+; RV64I-NEXT: ld s2, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, s6, s2
+; RV64I-NEXT: xor a0, a0, a7
+; RV64I-NEXT: xor a3, t2, a3
+; RV64I-NEXT: xor a4, a4, a6
+; RV64I-NEXT: xor a5, a5, s3
+; RV64I-NEXT: xor a6, s1, s2
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: xor a1, a4, t1
+; RV64I-NEXT: xor a3, a5, t3
+; RV64I-NEXT: xor a0, a6, a0
+; RV64I-NEXT: xor a2, a2, s0
+; RV64I-NEXT: xor a1, a1, t4
+; RV64I-NEXT: xor a3, a3, t5
+; RV64I-NEXT: xor a0, a0, a2
+; RV64I-NEXT: xor a1, a1, t6
+; RV64I-NEXT: xor a2, a3, t0
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a0, a0, 32
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32IM-LABEL: clmulh_i32:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -144
-; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi sp, sp, -80
+; RV32IM-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s3, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s4, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s5, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s6, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s7, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s8, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s9, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
; RV32IM-NEXT: srli t0, a0, 8
-; RV32IM-NEXT: lui a3, 16
+; RV32IM-NEXT: lui a6, 16
; RV32IM-NEXT: srli t1, a0, 24
; RV32IM-NEXT: slli a2, a0, 24
-; RV32IM-NEXT: lui s1, 61681
-; RV32IM-NEXT: lui s3, 209715
-; RV32IM-NEXT: lui a6, 349525
-; RV32IM-NEXT: srli t4, a1, 8
-; RV32IM-NEXT: srli t6, a1, 24
-; RV32IM-NEXT: slli a4, a1, 24
-; RV32IM-NEXT: li t3, 1
-; RV32IM-NEXT: lui s11, 2
-; RV32IM-NEXT: lui t2, 4
-; RV32IM-NEXT: lui s10, 8
-; RV32IM-NEXT: lui t5, 32
-; RV32IM-NEXT: lui s0, 64
-; RV32IM-NEXT: lui s2, 128
-; RV32IM-NEXT: lui s4, 256
-; RV32IM-NEXT: lui s5, 512
-; RV32IM-NEXT: lui s6, 1024
-; RV32IM-NEXT: lui s7, 2048
-; RV32IM-NEXT: lui s8, 4096
-; RV32IM-NEXT: lui s9, 8192
-; RV32IM-NEXT: lui ra, 16384
-; RV32IM-NEXT: addi a3, a3, -256
-; RV32IM-NEXT: lui a5, 16
-; RV32IM-NEXT: and t0, t0, a3
-; RV32IM-NEXT: or t1, t0, t1
-; RV32IM-NEXT: lui a7, 32768
-; RV32IM-NEXT: and t4, t4, a3
-; RV32IM-NEXT: or t6, t4, t6
-; RV32IM-NEXT: lui t0, 65536
-; RV32IM-NEXT: and a0, a0, a3
-; RV32IM-NEXT: mv t4, a3
-; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a4, 61681
+; RV32IM-NEXT: lui t2, 209715
+; RV32IM-NEXT: lui a3, 349525
+; RV32IM-NEXT: srli s0, a1, 8
+; RV32IM-NEXT: srli t4, a1, 24
+; RV32IM-NEXT: slli t6, a1, 24
+; RV32IM-NEXT: li s11, 1
+; RV32IM-NEXT: lui s6, 1
+; RV32IM-NEXT: lui t5, 4
+; RV32IM-NEXT: lui a5, 8
+; RV32IM-NEXT: lui s2, 32
+; RV32IM-NEXT: lui s7, 64
+; RV32IM-NEXT: lui s8, 128
+; RV32IM-NEXT: lui s3, 256
+; RV32IM-NEXT: lui s4, 512
+; RV32IM-NEXT: lui s5, 1024
+; RV32IM-NEXT: lui s9, 4096
+; RV32IM-NEXT: lui s10, 8192
+; RV32IM-NEXT: addi a7, a6, -256
+; RV32IM-NEXT: lui t3, 16
+; RV32IM-NEXT: addi a6, a4, -241
+; RV32IM-NEXT: addi t2, t2, 819
+; RV32IM-NEXT: addi a3, a3, 1365
+; RV32IM-NEXT: slli s11, s11, 11
+; RV32IM-NEXT: and t0, t0, a7
+; RV32IM-NEXT: and a0, a0, a7
+; RV32IM-NEXT: and s0, s0, a7
+; RV32IM-NEXT: and a1, a1, a7
+; RV32IM-NEXT: or t0, t0, t1
; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a2, a2, a0
-; RV32IM-NEXT: lui a3, 131072
-; RV32IM-NEXT: and a1, a1, t4
+; RV32IM-NEXT: or t1, s0, t4
; RV32IM-NEXT: slli a1, a1, 8
-; RV32IM-NEXT: or a0, a4, a1
-; RV32IM-NEXT: lui a1, 262144
-; RV32IM-NEXT: addi s1, s1, -241
-; RV32IM-NEXT: addi s3, s3, 819
-; RV32IM-NEXT: or a2, a2, t1
-; RV32IM-NEXT: addi a4, a6, 1365
-; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: or a0, a0, t6
-; RV32IM-NEXT: srli a6, a2, 4
-; RV32IM-NEXT: and a2, a2, s1
-; RV32IM-NEXT: and a6, a6, s1
-; RV32IM-NEXT: slli a2, a2, 4
-; RV32IM-NEXT: or a2, a6, a2
-; RV32IM-NEXT: srli a6, a0, 4
-; RV32IM-NEXT: and a0, a0, s1
-; RV32IM-NEXT: and a6, a6, s1
+; RV32IM-NEXT: or a0, a2, a0
+; RV32IM-NEXT: or a1, t6, a1
+; RV32IM-NEXT: or a0, a0, t0
+; RV32IM-NEXT: or a1, a1, t1
+; RV32IM-NEXT: srli t0, a0, 4
+; RV32IM-NEXT: and a0, a0, a6
+; RV32IM-NEXT: srli t1, a1, 4
+; RV32IM-NEXT: and a1, a1, a6
+; RV32IM-NEXT: and t0, t0, a6
; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: or a0, a6, a0
-; RV32IM-NEXT: srli a6, a2, 2
-; RV32IM-NEXT: and a2, a2, s3
-; RV32IM-NEXT: and a6, a6, s3
-; RV32IM-NEXT: slli a2, a2, 2
-; RV32IM-NEXT: or a2, a6, a2
-; RV32IM-NEXT: srli a6, a0, 2
-; RV32IM-NEXT: and a0, a0, s3
-; RV32IM-NEXT: and a6, a6, s3
+; RV32IM-NEXT: and t1, t1, a6
+; RV32IM-NEXT: slli a1, a1, 4
+; RV32IM-NEXT: or a0, t0, a0
+; RV32IM-NEXT: or a1, t1, a1
+; RV32IM-NEXT: srli t0, a0, 2
+; RV32IM-NEXT: and a0, a0, t2
+; RV32IM-NEXT: srli t1, a1, 2
+; RV32IM-NEXT: and a1, a1, t2
+; RV32IM-NEXT: and t0, t0, t2
; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or a0, a6, a0
-; RV32IM-NEXT: srli a6, a2, 1
-; RV32IM-NEXT: and a2, a2, a4
-; RV32IM-NEXT: and a6, a6, a4
-; RV32IM-NEXT: slli a2, a2, 1
-; RV32IM-NEXT: or a6, a6, a2
-; RV32IM-NEXT: srli a2, a0, 1
-; RV32IM-NEXT: and a0, a0, a4
-; RV32IM-NEXT: and a2, a2, a4
-; RV32IM-NEXT: slli a0, a0, 1
-; RV32IM-NEXT: or a0, a2, a0
-; RV32IM-NEXT: lui a2, 524288
-; RV32IM-NEXT: slli t3, t3, 11
-; RV32IM-NEXT: and t3, a0, t3
-; RV32IM-NEXT: lui a4, 1
-; RV32IM-NEXT: and t4, a0, a4
-; RV32IM-NEXT: and s11, a0, s11
-; RV32IM-NEXT: and a4, a0, t2
-; RV32IM-NEXT: sw a4, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a0, s10
-; RV32IM-NEXT: sw a4, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a5, a0, a5
-; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a0, t5
-; RV32IM-NEXT: sw a4, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s0, a0, s0
-; RV32IM-NEXT: and a4, a0, s2
-; RV32IM-NEXT: sw a4, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s4, a0, s4
-; RV32IM-NEXT: and a4, a0, s5
-; RV32IM-NEXT: sw a4, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a0, s6
-; RV32IM-NEXT: sw a4, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a0, s7
-; RV32IM-NEXT: sw a4, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a0, s8
-; RV32IM-NEXT: sw a4, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a0, s9
-; RV32IM-NEXT: sw a4, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a0, ra
-; RV32IM-NEXT: sw a4, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a0, a7
-; RV32IM-NEXT: sw a4, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a4, a0, t0
-; RV32IM-NEXT: sw a4, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a0, a3
+; RV32IM-NEXT: and t1, t1, t2
+; RV32IM-NEXT: slli a1, a1, 2
+; RV32IM-NEXT: or a0, t0, a0
+; RV32IM-NEXT: or a1, t1, a1
+; RV32IM-NEXT: srli t0, a0, 1
; RV32IM-NEXT: sw a3, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a1, a0, a1
+; RV32IM-NEXT: and a0, a0, a3
+; RV32IM-NEXT: srli t1, a1, 1
+; RV32IM-NEXT: and a1, a1, a3
+; RV32IM-NEXT: and t0, t0, a3
+; RV32IM-NEXT: slli a0, a0, 1
+; RV32IM-NEXT: and t1, t1, a3
+; RV32IM-NEXT: slli a1, a1, 1
+; RV32IM-NEXT: or a0, t0, a0
+; RV32IM-NEXT: or s1, t1, a1
+; RV32IM-NEXT: slli t4, a0, 1
+; RV32IM-NEXT: andi t6, s1, 2
+; RV32IM-NEXT: slli s0, a0, 2
+; RV32IM-NEXT: andi ra, s1, 4
+; RV32IM-NEXT: slli a3, a0, 3
+; RV32IM-NEXT: andi a2, s1, 8
+; RV32IM-NEXT: slli a4, a0, 4
+; RV32IM-NEXT: and t0, s1, s11
+; RV32IM-NEXT: and t1, s1, s6
+; RV32IM-NEXT: mul t0, a0, t0
+; RV32IM-NEXT: mul t1, a0, t1
+; RV32IM-NEXT: xor a1, t0, t1
; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a2, a0, a2
-; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi ra, a0, 2
-; RV32IM-NEXT: andi a1, a0, 1
-; RV32IM-NEXT: andi a2, a0, 4
-; RV32IM-NEXT: andi a3, a0, 8
-; RV32IM-NEXT: andi a4, a0, 16
-; RV32IM-NEXT: andi a5, a0, 32
-; RV32IM-NEXT: andi a7, a0, 64
-; RV32IM-NEXT: andi t0, a0, 128
-; RV32IM-NEXT: andi t1, a0, 256
-; RV32IM-NEXT: andi t2, a0, 512
-; RV32IM-NEXT: andi a0, a0, 1024
-; RV32IM-NEXT: mul ra, a6, ra
-; RV32IM-NEXT: mul s10, a6, a1
-; RV32IM-NEXT: mul s9, a6, a2
-; RV32IM-NEXT: mul s5, a6, a3
-; RV32IM-NEXT: mul s6, a6, a4
-; RV32IM-NEXT: mul s2, a6, a5
-; RV32IM-NEXT: mul a1, a6, a7
+; RV32IM-NEXT: andi s6, s1, 16
+; RV32IM-NEXT: and t0, s1, s7
+; RV32IM-NEXT: and t1, s1, s8
+; RV32IM-NEXT: mul t0, a0, t0
+; RV32IM-NEXT: mul t1, a0, t1
+; RV32IM-NEXT: xor a1, t0, t1
+; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli s7, a0, 5
+; RV32IM-NEXT: and t0, s1, s9
+; RV32IM-NEXT: and s8, s1, s10
+; RV32IM-NEXT: mul t0, a0, t0
+; RV32IM-NEXT: mul s8, a0, s8
+; RV32IM-NEXT: xor a1, t0, s8
+; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi s8, s1, 32
+; RV32IM-NEXT: seqz t6, t6
+; RV32IM-NEXT: addi t6, t6, -1
+; RV32IM-NEXT: and a1, t6, t4
+; RV32IM-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli s9, a0, 6
+; RV32IM-NEXT: seqz t6, ra
+; RV32IM-NEXT: addi t6, t6, -1
+; RV32IM-NEXT: and a1, t6, s0
; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a6, t0
-; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t6, a6, t1
-; RV32IM-NEXT: mul t2, a6, t2
-; RV32IM-NEXT: mul s7, a6, a0
-; RV32IM-NEXT: mul a0, a6, t3
-; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t4
-; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t1, a6, s11
-; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a7, a6, a0
-; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t5, a6, a0
-; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s8, a6, a0
-; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s0
-; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a6, a0
-; RV32IM-NEXT: mul a2, a6, s4
-; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a5, a6, a0
-; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t3, a6, a0
-; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s4, a6, a0
-; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a6, a0
-; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: lw a4, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a6, a4
-; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t0, a6, t0
-; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t4, a6, t4
-; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s0, a6, s0
-; RV32IM-NEXT: lw s11, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s11, a6, s11
-; RV32IM-NEXT: sw s11, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a6, a6, s11
-; RV32IM-NEXT: xor s10, s10, ra
-; RV32IM-NEXT: xor s5, s9, s5
-; RV32IM-NEXT: xor s2, s6, s2
-; RV32IM-NEXT: xor t2, t6, t2
-; RV32IM-NEXT: xor a7, t1, a7
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: xor a1, s10, s5
-; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, s2, a3
-; RV32IM-NEXT: xor t1, t2, s7
-; RV32IM-NEXT: xor a7, a7, t5
-; RV32IM-NEXT: xor a2, a2, a5
-; RV32IM-NEXT: xor a0, a0, a4
-; RV32IM-NEXT: xor a1, a1, a3
-; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, t1, a3
-; RV32IM-NEXT: xor a4, a7, s8
-; RV32IM-NEXT: xor a2, a2, t3
-; RV32IM-NEXT: xor a0, a0, t0
-; RV32IM-NEXT: lw a5, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a5
-; RV32IM-NEXT: lw a5, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a5
-; RV32IM-NEXT: lw a5, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: xor a2, a2, s4
-; RV32IM-NEXT: xor a0, a0, t4
-; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: xor a0, a0, s0
-; RV32IM-NEXT: lui a5, 349525
-; RV32IM-NEXT: addi a5, a5, 1364
-; RV32IM-NEXT: xor a3, a1, a3
-; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: andi s10, s1, 64
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: and s0, a2, a3
+; RV32IM-NEXT: slli a2, a0, 7
+; RV32IM-NEXT: seqz a3, s6
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and s6, a3, a4
+; RV32IM-NEXT: andi a3, s1, 128
+; RV32IM-NEXT: seqz a4, s8
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: and s8, a4, s7
+; RV32IM-NEXT: slli a4, a0, 8
+; RV32IM-NEXT: seqz s7, s10
+; RV32IM-NEXT: addi s7, s7, -1
+; RV32IM-NEXT: and s7, s7, s9
+; RV32IM-NEXT: andi s10, s1, 256
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and s9, a3, a2
+; RV32IM-NEXT: slli a2, a0, 9
+; RV32IM-NEXT: seqz a3, s10
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and s11, a3, a4
+; RV32IM-NEXT: andi a3, s1, 512
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and s10, a3, a2
+; RV32IM-NEXT: lui a2, 2048
+; RV32IM-NEXT: lui a3, 2
+; RV32IM-NEXT: and a3, s1, a3
+; RV32IM-NEXT: and a4, s1, t5
+; RV32IM-NEXT: and a5, s1, a5
+; RV32IM-NEXT: and t5, s1, t3
+; RV32IM-NEXT: and s2, s1, s2
+; RV32IM-NEXT: and s3, s1, s3
+; RV32IM-NEXT: and s4, s1, s4
+; RV32IM-NEXT: and s5, s1, s5
+; RV32IM-NEXT: and a2, s1, a2
+; RV32IM-NEXT: lui ra, 16384
+; RV32IM-NEXT: and ra, s1, ra
+; RV32IM-NEXT: lui t3, 32768
+; RV32IM-NEXT: and t3, s1, t3
+; RV32IM-NEXT: lui t1, 65536
+; RV32IM-NEXT: and t1, s1, t1
+; RV32IM-NEXT: lui t0, 131072
+; RV32IM-NEXT: and t0, s1, t0
+; RV32IM-NEXT: lui t4, 262144
+; RV32IM-NEXT: and t4, s1, t4
+; RV32IM-NEXT: lui t6, 524288
+; RV32IM-NEXT: and t6, s1, t6
+; RV32IM-NEXT: andi a1, s1, 1
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: mul a3, a0, a3
+; RV32IM-NEXT: mul a4, a0, a4
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: mul t5, a0, t5
+; RV32IM-NEXT: mul s2, a0, s2
+; RV32IM-NEXT: mul s3, a0, s3
+; RV32IM-NEXT: mul s4, a0, s4
+; RV32IM-NEXT: mul s5, a0, s5
+; RV32IM-NEXT: mul a2, a0, a2
+; RV32IM-NEXT: mul ra, a0, ra
+; RV32IM-NEXT: mul t3, a0, t3
+; RV32IM-NEXT: mul t1, a0, t1
+; RV32IM-NEXT: mul t0, a0, t0
+; RV32IM-NEXT: mul t4, a0, t4
+; RV32IM-NEXT: mul t6, a0, t6
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: and a1, a1, a0
+; RV32IM-NEXT: slli a0, a0, 10
+; RV32IM-NEXT: andi s1, s1, 1024
+; RV32IM-NEXT: seqz s1, s1
+; RV32IM-NEXT: addi s1, s1, -1
+; RV32IM-NEXT: and a0, s1, a0
+; RV32IM-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, s1, a3
+; RV32IM-NEXT: lw s1, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, s3
+; RV32IM-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, s3, ra
+; RV32IM-NEXT: lw ra, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, ra
+; RV32IM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, ra, s0
+; RV32IM-NEXT: xor s6, s6, s8
+; RV32IM-NEXT: xor s8, s9, s11
; RV32IM-NEXT: xor a3, a3, a4
-; RV32IM-NEXT: lw a4, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a4
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: xor a0, a0, a6
-; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a3, a2, a6
-; RV32IM-NEXT: srli a4, a2, 8
-; RV32IM-NEXT: xor a0, a2, a0
-; RV32IM-NEXT: slli a3, a3, 8
-; RV32IM-NEXT: and a2, a4, a6
+; RV32IM-NEXT: xor a4, s1, s4
+; RV32IM-NEXT: xor t3, s3, t3
+; RV32IM-NEXT: xor a1, a1, s0
+; RV32IM-NEXT: xor s0, s6, s7
+; RV32IM-NEXT: xor s1, s8, s10
+; RV32IM-NEXT: xor a3, a3, a5
+; RV32IM-NEXT: xor a4, a4, s5
+; RV32IM-NEXT: xor a5, t3, t1
+; RV32IM-NEXT: xor a1, a1, s0
+; RV32IM-NEXT: xor a0, s1, a0
+; RV32IM-NEXT: xor a3, a3, t5
+; RV32IM-NEXT: xor a2, a4, a2
+; RV32IM-NEXT: xor a4, a5, t0
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lui a1, 349525
+; RV32IM-NEXT: addi a1, a1, 1364
+; RV32IM-NEXT: xor a3, a3, s2
+; RV32IM-NEXT: xor a4, a4, t4
+; RV32IM-NEXT: slli a5, a0, 24
+; RV32IM-NEXT: xor a0, a0, a3
+; RV32IM-NEXT: xor a3, a4, t6
+; RV32IM-NEXT: xor a0, a0, a2
+; RV32IM-NEXT: and a2, a0, a7
+; RV32IM-NEXT: srli a4, a0, 8
+; RV32IM-NEXT: xor a0, a0, a3
+; RV32IM-NEXT: slli a2, a2, 8
+; RV32IM-NEXT: and a3, a4, a7
; RV32IM-NEXT: srli a0, a0, 24
-; RV32IM-NEXT: or a1, a1, a3
+; RV32IM-NEXT: or a2, a5, a2
+; RV32IM-NEXT: or a0, a3, a0
; RV32IM-NEXT: or a0, a2, a0
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: srli a1, a0, 4
-; RV32IM-NEXT: and a0, a0, s1
-; RV32IM-NEXT: and a1, a1, s1
+; RV32IM-NEXT: srli a2, a0, 4
+; RV32IM-NEXT: and a0, a0, a6
+; RV32IM-NEXT: and a2, a2, a6
; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: srli a1, a0, 2
-; RV32IM-NEXT: and a0, a0, s3
-; RV32IM-NEXT: and a1, a1, s3
+; RV32IM-NEXT: or a0, a2, a0
+; RV32IM-NEXT: srli a2, a0, 2
+; RV32IM-NEXT: and a0, a0, t2
+; RV32IM-NEXT: and a2, a2, t2
; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: srli a1, a0, 1
-; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, a0, a2
-; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: or a0, a2, a0
+; RV32IM-NEXT: srli a2, a0, 1
+; RV32IM-NEXT: lw a3, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a0, a0, a3
+; RV32IM-NEXT: and a1, a2, a1
; RV32IM-NEXT: slli a0, a0, 1
; RV32IM-NEXT: or a0, a1, a0
; RV32IM-NEXT: srli a0, a0, 1
-; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: addi sp, sp, 144
+; RV32IM-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s4, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s6, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s7, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s8, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s9, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s11, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 80
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: clmulh_i32:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: addi sp, sp, -128
-; RV64IM-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a6, a0, 32
-; RV64IM-NEXT: andi t1, a1, 2
-; RV64IM-NEXT: andi t3, a1, 1
-; RV64IM-NEXT: andi a5, a1, 4
-; RV64IM-NEXT: andi a7, a1, 8
-; RV64IM-NEXT: andi a3, a1, 16
-; RV64IM-NEXT: andi a4, a1, 32
-; RV64IM-NEXT: andi a0, a1, 64
-; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi t0, a1, 128
-; RV64IM-NEXT: andi t2, a1, 256
-; RV64IM-NEXT: andi a0, a1, 512
-; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: li a2, 1
-; RV64IM-NEXT: lui t5, 1
-; RV64IM-NEXT: lui t6, 2
-; RV64IM-NEXT: lui s0, 4
-; RV64IM-NEXT: lui s2, 8
-; RV64IM-NEXT: lui s3, 16
-; RV64IM-NEXT: lui s4, 32
-; RV64IM-NEXT: lui s5, 64
-; RV64IM-NEXT: lui s6, 128
-; RV64IM-NEXT: lui s7, 256
-; RV64IM-NEXT: lui s8, 512
-; RV64IM-NEXT: lui s9, 1024
-; RV64IM-NEXT: lui s10, 2048
-; RV64IM-NEXT: lui s11, 4096
-; RV64IM-NEXT: lui ra, 8192
-; RV64IM-NEXT: lui a0, 16384
-; RV64IM-NEXT: srli s1, a6, 32
-; RV64IM-NEXT: mul a6, s1, t1
-; RV64IM-NEXT: mul t1, s1, t3
-; RV64IM-NEXT: xor a6, t1, a6
-; RV64IM-NEXT: sd a6, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui t1, 32768
-; RV64IM-NEXT: mul a5, s1, a5
-; RV64IM-NEXT: mul a7, s1, a7
-; RV64IM-NEXT: xor t4, a5, a7
-; RV64IM-NEXT: lui a7, 65536
-; RV64IM-NEXT: mul a3, s1, a3
-; RV64IM-NEXT: mul a4, s1, a4
-; RV64IM-NEXT: xor a6, a3, a4
-; RV64IM-NEXT: lui t3, 131072
-; RV64IM-NEXT: mul a4, s1, t0
-; RV64IM-NEXT: mul t0, s1, t2
-; RV64IM-NEXT: xor a5, a4, t0
-; RV64IM-NEXT: lui t0, 262144
-; RV64IM-NEXT: slli t2, a2, 11
-; RV64IM-NEXT: and t5, a1, t5
-; RV64IM-NEXT: and t6, a1, t6
+; RV64IM-NEXT: addi sp, sp, -16
+; RV64IM-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a0, a0, 32
+; RV64IM-NEXT: andi a2, a1, 2
+; RV64IM-NEXT: andi a3, a1, 4
+; RV64IM-NEXT: andi a4, a1, 8
+; RV64IM-NEXT: andi a6, a1, 16
+; RV64IM-NEXT: andi a7, a1, 32
+; RV64IM-NEXT: andi t1, a1, 64
+; RV64IM-NEXT: andi t2, a1, 128
+; RV64IM-NEXT: andi t3, a1, 256
+; RV64IM-NEXT: andi t4, a1, 512
+; RV64IM-NEXT: andi t6, a1, 1024
+; RV64IM-NEXT: lui t5, 16
+; RV64IM-NEXT: lui s0, 32
+; RV64IM-NEXT: srli a5, a0, 31
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: srli a2, a0, 30
+; RV64IM-NEXT: seqz a3, a3
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and t0, a3, a2
+; RV64IM-NEXT: srli a2, a0, 29
+; RV64IM-NEXT: seqz a3, a4
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and a4, a3, a2
+; RV64IM-NEXT: srli a2, a0, 28
+; RV64IM-NEXT: seqz a3, a6
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and a6, a3, a2
+; RV64IM-NEXT: srli a2, a0, 27
+; RV64IM-NEXT: seqz a3, a7
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and s1, a3, a2
+; RV64IM-NEXT: srli a2, a0, 26
+; RV64IM-NEXT: seqz a3, t1
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and a2, a3, a2
+; RV64IM-NEXT: srli a3, a0, 25
+; RV64IM-NEXT: seqz a7, t2
+; RV64IM-NEXT: addi a7, a7, -1
+; RV64IM-NEXT: and t1, a7, a3
+; RV64IM-NEXT: srli a3, a0, 24
+; RV64IM-NEXT: seqz a7, t3
+; RV64IM-NEXT: addi a7, a7, -1
+; RV64IM-NEXT: and t2, a7, a3
+; RV64IM-NEXT: srli a3, a0, 23
+; RV64IM-NEXT: seqz a7, t4
+; RV64IM-NEXT: addi a7, a7, -1
+; RV64IM-NEXT: and a7, a7, a3
+; RV64IM-NEXT: srli a3, a0, 22
+; RV64IM-NEXT: seqz t3, t6
+; RV64IM-NEXT: addi t3, t3, -1
+; RV64IM-NEXT: and a3, t3, a3
+; RV64IM-NEXT: lui t3, 2048
+; RV64IM-NEXT: srli a0, a0, 32
+; RV64IM-NEXT: and t4, a1, t5
; RV64IM-NEXT: and s0, a1, s0
-; RV64IM-NEXT: and s2, a1, s2
-; RV64IM-NEXT: and s3, a1, s3
-; RV64IM-NEXT: and s4, a1, s4
-; RV64IM-NEXT: and s5, a1, s5
-; RV64IM-NEXT: and s6, a1, s6
-; RV64IM-NEXT: and s7, a1, s7
-; RV64IM-NEXT: and s8, a1, s8
-; RV64IM-NEXT: and s9, a1, s9
-; RV64IM-NEXT: and s10, a1, s10
-; RV64IM-NEXT: and s11, a1, s11
-; RV64IM-NEXT: and ra, a1, ra
-; RV64IM-NEXT: and a2, a1, a0
-; RV64IM-NEXT: and t1, a1, t1
-; RV64IM-NEXT: and a7, a1, a7
+; RV64IM-NEXT: mul t4, a0, t4
+; RV64IM-NEXT: mul t5, a0, s0
+; RV64IM-NEXT: xor t4, t4, t5
+; RV64IM-NEXT: lui t5, 4096
; RV64IM-NEXT: and t3, a1, t3
+; RV64IM-NEXT: and t5, a1, t5
+; RV64IM-NEXT: mul t3, a0, t3
+; RV64IM-NEXT: mul t5, a0, t5
+; RV64IM-NEXT: xor t3, t3, t5
+; RV64IM-NEXT: andi t5, a1, 1
+; RV64IM-NEXT: seqz t5, t5
+; RV64IM-NEXT: addi t5, t5, -1
+; RV64IM-NEXT: and t5, t5, a0
+; RV64IM-NEXT: xor a5, t5, a5
+; RV64IM-NEXT: xor a4, t0, a4
+; RV64IM-NEXT: xor a6, a6, s1
+; RV64IM-NEXT: li t0, 1
+; RV64IM-NEXT: xor t1, t1, t2
+; RV64IM-NEXT: lui t2, 1
+; RV64IM-NEXT: slli t0, t0, 11
+; RV64IM-NEXT: and t2, a1, t2
; RV64IM-NEXT: and t0, a1, t0
+; RV64IM-NEXT: mul t2, a0, t2
+; RV64IM-NEXT: mul t0, a0, t0
+; RV64IM-NEXT: xor t0, t0, t2
+; RV64IM-NEXT: lui t2, 64
; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: andi a0, a1, 1024
-; RV64IM-NEXT: srliw a1, a1, 31
-; RV64IM-NEXT: slli a1, a1, 31
-; RV64IM-NEXT: ld a3, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, s1, a3
-; RV64IM-NEXT: ld a4, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, s1, a4
-; RV64IM-NEXT: mul a0, s1, a0
-; RV64IM-NEXT: mul t5, s1, t5
-; RV64IM-NEXT: mul t6, s1, t6
-; RV64IM-NEXT: mul s0, s1, s0
-; RV64IM-NEXT: mul s2, s1, s2
-; RV64IM-NEXT: mul s3, s1, s3
-; RV64IM-NEXT: mul s4, s1, s4
-; RV64IM-NEXT: mul s5, s1, s5
-; RV64IM-NEXT: mul s6, s1, s6
-; RV64IM-NEXT: mul s7, s1, s7
-; RV64IM-NEXT: mul s8, s1, s8
-; RV64IM-NEXT: mul s9, s1, s9
-; RV64IM-NEXT: mul s10, s1, s10
-; RV64IM-NEXT: mul s11, s1, s11
-; RV64IM-NEXT: mul ra, s1, ra
-; RV64IM-NEXT: mul a2, s1, a2
-; RV64IM-NEXT: mul t1, s1, t1
-; RV64IM-NEXT: mul a7, s1, a7
-; RV64IM-NEXT: mul t3, s1, t3
-; RV64IM-NEXT: mul t0, s1, t0
-; RV64IM-NEXT: mul a1, s1, a1
-; RV64IM-NEXT: mul t2, s1, t2
-; RV64IM-NEXT: xor s1, s2, s3
-; RV64IM-NEXT: xor s2, s8, s9
-; RV64IM-NEXT: xor a7, a7, t3
-; RV64IM-NEXT: ld t3, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul t2, a0, t2
+; RV64IM-NEXT: xor t2, t4, t2
+; RV64IM-NEXT: lui t4, 8192
+; RV64IM-NEXT: and t4, a1, t4
+; RV64IM-NEXT: mul t4, a0, t4
; RV64IM-NEXT: xor t3, t3, t4
-; RV64IM-NEXT: xor a3, a6, a3
; RV64IM-NEXT: xor a4, a5, a4
-; RV64IM-NEXT: xor a5, t2, t5
-; RV64IM-NEXT: xor a6, s1, s4
-; RV64IM-NEXT: xor t2, s2, s10
-; RV64IM-NEXT: xor a7, a7, t0
-; RV64IM-NEXT: xor a3, t3, a3
-; RV64IM-NEXT: xor a0, a4, a0
-; RV64IM-NEXT: xor a4, a5, t6
-; RV64IM-NEXT: xor a5, a6, s5
-; RV64IM-NEXT: xor a6, t2, s11
-; RV64IM-NEXT: xor a0, a3, a0
-; RV64IM-NEXT: xor a4, a4, s0
-; RV64IM-NEXT: xor a3, a5, s6
-; RV64IM-NEXT: xor a5, a6, ra
-; RV64IM-NEXT: xor a0, a0, a4
-; RV64IM-NEXT: xor a3, a3, s7
-; RV64IM-NEXT: xor a2, a5, a2
-; RV64IM-NEXT: xor a0, a0, a3
-; RV64IM-NEXT: xor a2, a2, t1
-; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: xor a1, a7, a1
-; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: xor a2, a6, a2
+; RV64IM-NEXT: xor a5, t1, a7
+; RV64IM-NEXT: lui a6, 2
+; RV64IM-NEXT: and a6, a1, a6
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a6, t0, a6
+; RV64IM-NEXT: lui a7, 128
+; RV64IM-NEXT: and a7, a1, a7
+; RV64IM-NEXT: mul a7, a0, a7
+; RV64IM-NEXT: xor a7, t2, a7
+; RV64IM-NEXT: lui t0, 16384
+; RV64IM-NEXT: and t0, a1, t0
+; RV64IM-NEXT: mul t0, a0, t0
+; RV64IM-NEXT: xor t0, t3, t0
+; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: xor a3, a5, a3
+; RV64IM-NEXT: lui a4, 4
+; RV64IM-NEXT: and a4, a1, a4
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: xor a4, a6, a4
+; RV64IM-NEXT: lui a5, 256
+; RV64IM-NEXT: and a5, a1, a5
+; RV64IM-NEXT: mul a5, a0, a5
+; RV64IM-NEXT: xor a5, a7, a5
+; RV64IM-NEXT: lui a6, 32768
+; RV64IM-NEXT: and a6, a1, a6
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a6, t0, a6
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: lui a3, 8
+; RV64IM-NEXT: and a3, a1, a3
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: xor a3, a4, a3
+; RV64IM-NEXT: lui a4, 512
+; RV64IM-NEXT: and a4, a1, a4
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: xor a4, a5, a4
+; RV64IM-NEXT: lui a5, 65536
+; RV64IM-NEXT: and a5, a1, a5
+; RV64IM-NEXT: mul a5, a0, a5
+; RV64IM-NEXT: xor a5, a6, a5
+; RV64IM-NEXT: lui a6, 1024
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: lui a3, 131072
+; RV64IM-NEXT: and a6, a1, a6
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a4, a4, a6
+; RV64IM-NEXT: lui a6, 262144
+; RV64IM-NEXT: and a3, a1, a3
+; RV64IM-NEXT: and a6, a1, a6
+; RV64IM-NEXT: srliw a1, a1, 31
+; RV64IM-NEXT: slli a1, a1, 31
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a3, a5, a3
+; RV64IM-NEXT: xor a2, a2, a4
+; RV64IM-NEXT: xor a3, a3, a6
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: mul a0, a0, a1
+; RV64IM-NEXT: xor a0, a2, a0
; RV64IM-NEXT: srli a0, a0, 32
-; RV64IM-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: addi sp, sp, 128
+; RV64IM-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 16
; RV64IM-NEXT: ret
;
; RV32IMZBS-LABEL: clmulh_i32:
; RV32IMZBS: # %bb.0:
-; RV32IMZBS-NEXT: addi sp, sp, -144
-; RV32IMZBS-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli t0, a0, 8
+; RV32IMZBS-NEXT: addi sp, sp, -96
+; RV32IMZBS-NEXT: sw ra, 92(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s0, 88(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s1, 84(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s2, 80(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s3, 76(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s4, 72(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s5, 68(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s6, 64(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s7, 60(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s8, 56(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s9, 52(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s10, 48(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s11, 44(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: srli a2, a0, 8
; RV32IMZBS-NEXT: lui a3, 16
-; RV32IMZBS-NEXT: srli t1, a0, 24
-; RV32IMZBS-NEXT: slli a2, a0, 24
-; RV32IMZBS-NEXT: lui s1, 61681
-; RV32IMZBS-NEXT: lui a6, 209715
-; RV32IMZBS-NEXT: lui s11, 349525
-; RV32IMZBS-NEXT: srli t4, a1, 8
-; RV32IMZBS-NEXT: srli t6, a1, 24
-; RV32IMZBS-NEXT: slli a4, a1, 24
-; RV32IMZBS-NEXT: bseti a7, zero, 11
-; RV32IMZBS-NEXT: lui t2, 4
-; RV32IMZBS-NEXT: lui s10, 8
-; RV32IMZBS-NEXT: lui t5, 32
-; RV32IMZBS-NEXT: lui s0, 64
-; RV32IMZBS-NEXT: lui s2, 128
-; RV32IMZBS-NEXT: lui s4, 256
-; RV32IMZBS-NEXT: lui s5, 512
-; RV32IMZBS-NEXT: lui s6, 1024
-; RV32IMZBS-NEXT: lui s7, 2048
-; RV32IMZBS-NEXT: lui s8, 4096
-; RV32IMZBS-NEXT: lui s9, 8192
-; RV32IMZBS-NEXT: lui ra, 16384
-; RV32IMZBS-NEXT: addi s3, a3, -256
-; RV32IMZBS-NEXT: sw s3, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t3, 16
-; RV32IMZBS-NEXT: and t0, t0, s3
-; RV32IMZBS-NEXT: or t1, t0, t1
-; RV32IMZBS-NEXT: lui a5, 32768
-; RV32IMZBS-NEXT: and t4, t4, s3
-; RV32IMZBS-NEXT: or t6, t4, t6
-; RV32IMZBS-NEXT: lui t0, 65536
-; RV32IMZBS-NEXT: and a0, a0, s3
+; RV32IMZBS-NEXT: srli a4, a0, 24
+; RV32IMZBS-NEXT: slli a5, a0, 24
+; RV32IMZBS-NEXT: lui a7, 61681
+; RV32IMZBS-NEXT: lui t0, 209715
+; RV32IMZBS-NEXT: lui a6, 349525
+; RV32IMZBS-NEXT: srli t1, a1, 8
+; RV32IMZBS-NEXT: srli t2, a1, 24
+; RV32IMZBS-NEXT: slli t3, a1, 24
+; RV32IMZBS-NEXT: addi t5, a3, -256
+; RV32IMZBS-NEXT: sw t5, 40(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: addi t4, a7, -241
+; RV32IMZBS-NEXT: sw t4, 36(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: addi a7, t0, 819
+; RV32IMZBS-NEXT: addi a6, a6, 1365
+; RV32IMZBS-NEXT: and a2, a2, t5
+; RV32IMZBS-NEXT: and a0, a0, t5
+; RV32IMZBS-NEXT: and a3, t1, t5
+; RV32IMZBS-NEXT: and a1, a1, t5
+; RV32IMZBS-NEXT: or a2, a2, a4
; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a2, a2, a0
-; RV32IMZBS-NEXT: lui a3, 131072
-; RV32IMZBS-NEXT: and a1, a1, s3
+; RV32IMZBS-NEXT: or a3, a3, t2
; RV32IMZBS-NEXT: slli a1, a1, 8
-; RV32IMZBS-NEXT: or a0, a4, a1
-; RV32IMZBS-NEXT: lui a1, 262144
-; RV32IMZBS-NEXT: addi s1, s1, -241
-; RV32IMZBS-NEXT: addi s3, a6, 819
-; RV32IMZBS-NEXT: or a2, a2, t1
-; RV32IMZBS-NEXT: addi a4, s11, 1365
-; RV32IMZBS-NEXT: sw a4, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: or a0, a0, t6
-; RV32IMZBS-NEXT: srli a6, a2, 4
-; RV32IMZBS-NEXT: and a2, a2, s1
-; RV32IMZBS-NEXT: and a6, a6, s1
-; RV32IMZBS-NEXT: slli a2, a2, 4
-; RV32IMZBS-NEXT: or a2, a6, a2
-; RV32IMZBS-NEXT: srli a6, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, s1
-; RV32IMZBS-NEXT: and a6, a6, s1
+; RV32IMZBS-NEXT: or a0, a5, a0
+; RV32IMZBS-NEXT: or a1, t3, a1
+; RV32IMZBS-NEXT: or a0, a0, a2
+; RV32IMZBS-NEXT: or a1, a1, a3
+; RV32IMZBS-NEXT: srli a2, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, t4
+; RV32IMZBS-NEXT: srli a3, a1, 4
+; RV32IMZBS-NEXT: and a1, a1, t4
+; RV32IMZBS-NEXT: and a2, a2, t4
; RV32IMZBS-NEXT: slli a0, a0, 4
-; RV32IMZBS-NEXT: or a0, a6, a0
-; RV32IMZBS-NEXT: srli a6, a2, 2
-; RV32IMZBS-NEXT: and a2, a2, s3
-; RV32IMZBS-NEXT: and a6, a6, s3
-; RV32IMZBS-NEXT: slli a2, a2, 2
-; RV32IMZBS-NEXT: or a2, a6, a2
-; RV32IMZBS-NEXT: srli a6, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, s3
-; RV32IMZBS-NEXT: and a6, a6, s3
+; RV32IMZBS-NEXT: and a3, a3, t4
+; RV32IMZBS-NEXT: slli a1, a1, 4
+; RV32IMZBS-NEXT: or a0, a2, a0
+; RV32IMZBS-NEXT: or a1, a3, a1
+; RV32IMZBS-NEXT: srli a2, a0, 2
+; RV32IMZBS-NEXT: sw a7, 32(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a7
+; RV32IMZBS-NEXT: srli a3, a1, 2
+; RV32IMZBS-NEXT: and a1, a1, a7
+; RV32IMZBS-NEXT: and a2, a2, a7
; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: or a0, a6, a0
-; RV32IMZBS-NEXT: srli a6, a2, 1
-; RV32IMZBS-NEXT: and a2, a2, a4
-; RV32IMZBS-NEXT: and a6, a6, a4
-; RV32IMZBS-NEXT: slli a2, a2, 1
-; RV32IMZBS-NEXT: or a6, a6, a2
+; RV32IMZBS-NEXT: and a3, a3, a7
+; RV32IMZBS-NEXT: slli a1, a1, 2
+; RV32IMZBS-NEXT: or a0, a2, a0
+; RV32IMZBS-NEXT: or a1, a3, a1
; RV32IMZBS-NEXT: srli a2, a0, 1
-; RV32IMZBS-NEXT: and a0, a0, a4
-; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a6, 28(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a6
+; RV32IMZBS-NEXT: srli a3, a1, 1
+; RV32IMZBS-NEXT: and a1, a1, a6
+; RV32IMZBS-NEXT: and a4, a2, a6
; RV32IMZBS-NEXT: slli a0, a0, 1
-; RV32IMZBS-NEXT: or a0, a2, a0
-; RV32IMZBS-NEXT: lui a2, 524288
-; RV32IMZBS-NEXT: and t4, a0, a7
-; RV32IMZBS-NEXT: lui a4, 1
-; RV32IMZBS-NEXT: and s11, a0, a4
-; RV32IMZBS-NEXT: lui a4, 2
-; RV32IMZBS-NEXT: and a4, a0, a4
-; RV32IMZBS-NEXT: sw a4, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a0, t2
-; RV32IMZBS-NEXT: sw a4, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a0, s10
-; RV32IMZBS-NEXT: sw a4, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t3, a0, t3
-; RV32IMZBS-NEXT: and a4, a0, t5
-; RV32IMZBS-NEXT: sw a4, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s0, a0, s0
-; RV32IMZBS-NEXT: and a4, a0, s2
-; RV32IMZBS-NEXT: sw a4, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s4, a0, s4
-; RV32IMZBS-NEXT: and a4, a0, s5
-; RV32IMZBS-NEXT: sw a4, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a0, s6
-; RV32IMZBS-NEXT: sw a4, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a0, s7
-; RV32IMZBS-NEXT: sw a4, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a0, s8
-; RV32IMZBS-NEXT: sw a4, 44(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a0, s9
-; RV32IMZBS-NEXT: sw a4, 40(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a0, ra
-; RV32IMZBS-NEXT: sw a4, 36(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a5, a0, a5
-; RV32IMZBS-NEXT: sw a5, 32(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a4, a0, t0
-; RV32IMZBS-NEXT: sw a4, 28(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 24(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a2, a0, a2
-; RV32IMZBS-NEXT: sw a2, 16(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi ra, a0, 2
-; RV32IMZBS-NEXT: andi a1, a0, 1
-; RV32IMZBS-NEXT: andi a2, a0, 4
-; RV32IMZBS-NEXT: andi a3, a0, 8
-; RV32IMZBS-NEXT: andi a4, a0, 16
-; RV32IMZBS-NEXT: andi a5, a0, 32
-; RV32IMZBS-NEXT: andi a7, a0, 64
-; RV32IMZBS-NEXT: andi t0, a0, 128
-; RV32IMZBS-NEXT: andi t1, a0, 256
-; RV32IMZBS-NEXT: andi t2, a0, 512
-; RV32IMZBS-NEXT: andi a0, a0, 1024
-; RV32IMZBS-NEXT: mul ra, a6, ra
-; RV32IMZBS-NEXT: mul s10, a6, a1
-; RV32IMZBS-NEXT: mul s9, a6, a2
-; RV32IMZBS-NEXT: mul s5, a6, a3
-; RV32IMZBS-NEXT: mul s6, a6, a4
-; RV32IMZBS-NEXT: mul s2, a6, a5
-; RV32IMZBS-NEXT: mul a1, a6, a7
-; RV32IMZBS-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a6, t0
-; RV32IMZBS-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t6, a6, t1
-; RV32IMZBS-NEXT: mul t2, a6, t2
-; RV32IMZBS-NEXT: mul s7, a6, a0
-; RV32IMZBS-NEXT: mul a0, a6, t4
-; RV32IMZBS-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s11
+; RV32IMZBS-NEXT: and a3, a3, a6
+; RV32IMZBS-NEXT: slli a2, a1, 1
+; RV32IMZBS-NEXT: or s2, a4, a0
+; RV32IMZBS-NEXT: or a1, a3, a2
+; RV32IMZBS-NEXT: srli a2, a2, 31
+; RV32IMZBS-NEXT: slli a3, s2, 1
+; RV32IMZBS-NEXT: andi a4, a1, 2
+; RV32IMZBS-NEXT: slli a5, s2, 2
+; RV32IMZBS-NEXT: andi t1, a1, 4
+; RV32IMZBS-NEXT: slli a0, s2, 3
+; RV32IMZBS-NEXT: andi t4, a1, 8
+; RV32IMZBS-NEXT: slli t5, s2, 4
+; RV32IMZBS-NEXT: andi t6, a1, 16
+; RV32IMZBS-NEXT: slli s0, s2, 5
+; RV32IMZBS-NEXT: andi s1, a1, 32
+; RV32IMZBS-NEXT: slli a7, s2, 6
+; RV32IMZBS-NEXT: andi s3, a1, 64
+; RV32IMZBS-NEXT: slli s4, s2, 7
+; RV32IMZBS-NEXT: andi s5, a1, 128
+; RV32IMZBS-NEXT: slli s6, s2, 8
+; RV32IMZBS-NEXT: andi s7, a1, 256
+; RV32IMZBS-NEXT: slli s8, s2, 9
+; RV32IMZBS-NEXT: andi s9, a1, 512
+; RV32IMZBS-NEXT: slli s10, s2, 10
+; RV32IMZBS-NEXT: slli t3, s2, 31
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a2, a2, t3
+; RV32IMZBS-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a6, a1, 1024
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a3, a4, a3
+; RV32IMZBS-NEXT: sw a3, 20(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s2, 11
+; RV32IMZBS-NEXT: seqz a4, t1
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 16(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not t3, a1
+; RV32IMZBS-NEXT: seqz a4, t4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a0, a4, a0
; RV32IMZBS-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t1, a6, a0
-; RV32IMZBS-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a7, a6, a0
-; RV32IMZBS-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t5, a6, a0
-; RV32IMZBS-NEXT: mul s8, a6, t3
-; RV32IMZBS-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s0
-; RV32IMZBS-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a0
-; RV32IMZBS-NEXT: mul a2, a6, s4
-; RV32IMZBS-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a5, a6, a0
-; RV32IMZBS-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t3, a6, a0
-; RV32IMZBS-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s4, a6, a0
-; RV32IMZBS-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a6, a0
-; RV32IMZBS-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: lw a4, 36(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a6, a4
-; RV32IMZBS-NEXT: lw t0, 32(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a6, t0
-; RV32IMZBS-NEXT: lw t4, 28(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t4, a6, t4
-; RV32IMZBS-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s0, a6, s0
-; RV32IMZBS-NEXT: lw s11, 20(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s11, a6, s11
-; RV32IMZBS-NEXT: sw s11, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s11, 16(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a6, a6, s11
-; RV32IMZBS-NEXT: xor s10, s10, ra
-; RV32IMZBS-NEXT: xor s5, s9, s5
-; RV32IMZBS-NEXT: xor s2, s6, s2
-; RV32IMZBS-NEXT: xor t2, t6, t2
-; RV32IMZBS-NEXT: xor a7, t1, a7
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: xor a1, s10, s5
-; RV32IMZBS-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, s2, a3
-; RV32IMZBS-NEXT: xor t1, t2, s7
-; RV32IMZBS-NEXT: xor a7, a7, t5
-; RV32IMZBS-NEXT: xor a2, a2, a5
-; RV32IMZBS-NEXT: xor a0, a0, a4
-; RV32IMZBS-NEXT: xor a1, a1, a3
-; RV32IMZBS-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, t1, a3
-; RV32IMZBS-NEXT: xor a4, a7, s8
-; RV32IMZBS-NEXT: xor a2, a2, t3
-; RV32IMZBS-NEXT: xor a0, a0, t0
-; RV32IMZBS-NEXT: lw a5, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a5
-; RV32IMZBS-NEXT: lw a5, 12(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a5
-; RV32IMZBS-NEXT: lw a5, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: xor a2, a2, s4
-; RV32IMZBS-NEXT: xor a0, a0, t4
-; RV32IMZBS-NEXT: lw a5, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: xor a0, a0, s0
-; RV32IMZBS-NEXT: lui a5, 349525
-; RV32IMZBS-NEXT: addi a5, a5, 1364
-; RV32IMZBS-NEXT: xor a3, a1, a3
-; RV32IMZBS-NEXT: slli a1, a1, 24
+; RV32IMZBS-NEXT: slli a4, s2, 12
+; RV32IMZBS-NEXT: seqz a5, t6
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a0, a5, t5
+; RV32IMZBS-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s2, 13
+; RV32IMZBS-NEXT: seqz t5, s1
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and t6, t5, s0
+; RV32IMZBS-NEXT: slli s11, s2, 14
+; RV32IMZBS-NEXT: seqz t5, s3
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and a0, t5, a7
+; RV32IMZBS-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli ra, s2, 15
+; RV32IMZBS-NEXT: seqz s0, s5
+; RV32IMZBS-NEXT: addi s0, s0, -1
+; RV32IMZBS-NEXT: and s1, s0, s4
+; RV32IMZBS-NEXT: slli s4, s2, 16
+; RV32IMZBS-NEXT: seqz s0, s7
+; RV32IMZBS-NEXT: addi s0, s0, -1
+; RV32IMZBS-NEXT: and s3, s0, s6
+; RV32IMZBS-NEXT: slli a2, s2, 17
+; RV32IMZBS-NEXT: seqz s0, s9
+; RV32IMZBS-NEXT: addi s0, s0, -1
+; RV32IMZBS-NEXT: and t4, s0, s8
+; RV32IMZBS-NEXT: slli s0, s2, 18
+; RV32IMZBS-NEXT: seqz a6, a6
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and t5, a6, s10
+; RV32IMZBS-NEXT: bexti s5, t3, 11
+; RV32IMZBS-NEXT: addi s5, s5, -1
+; RV32IMZBS-NEXT: and s8, s5, a3
+; RV32IMZBS-NEXT: bexti a3, t3, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and s9, a3, a4
+; RV32IMZBS-NEXT: bexti a3, t3, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and s7, a3, a5
+; RV32IMZBS-NEXT: bexti a3, t3, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and s6, a3, s11
+; RV32IMZBS-NEXT: bexti a3, t3, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and s5, a3, ra
+; RV32IMZBS-NEXT: bexti a3, t3, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and t0, a3, s4
+; RV32IMZBS-NEXT: bexti a3, t3, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a6, a3, a2
+; RV32IMZBS-NEXT: bexti a3, t3, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and ra, a3, s0
+; RV32IMZBS-NEXT: bexti a3, t3, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 19
+; RV32IMZBS-NEXT: and s10, a3, a5
+; RV32IMZBS-NEXT: bexti a3, t3, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 20
+; RV32IMZBS-NEXT: and s11, a3, a5
+; RV32IMZBS-NEXT: bexti a3, t3, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 21
+; RV32IMZBS-NEXT: and s4, a3, a5
+; RV32IMZBS-NEXT: bexti a3, t3, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 22
+; RV32IMZBS-NEXT: and s0, a3, a5
+; RV32IMZBS-NEXT: bexti a3, t3, 23
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a5, s2, 23
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: bexti a4, t3, 24
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s2, 24
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: bexti a5, t3, 25
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a7, s2, 25
+; RV32IMZBS-NEXT: and a2, a5, a7
+; RV32IMZBS-NEXT: bexti a5, t3, 26
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a7, s2, 26
+; RV32IMZBS-NEXT: and a5, a5, a7
+; RV32IMZBS-NEXT: bexti a7, t3, 27
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: slli t1, s2, 27
+; RV32IMZBS-NEXT: and t1, a7, t1
+; RV32IMZBS-NEXT: bexti a7, t3, 28
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: slli a0, s2, 28
+; RV32IMZBS-NEXT: and a7, a7, a0
+; RV32IMZBS-NEXT: bexti a0, t3, 29
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli t2, s2, 29
+; RV32IMZBS-NEXT: and a0, a0, t2
+; RV32IMZBS-NEXT: andi a1, a1, 1
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a1, a1, s2
+; RV32IMZBS-NEXT: slli s2, s2, 30
+; RV32IMZBS-NEXT: bexti t2, t3, 30
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: and t2, t2, s2
+; RV32IMZBS-NEXT: lw t3, 20(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, t3
+; RV32IMZBS-NEXT: lw t3, 16(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, t3, s2
+; RV32IMZBS-NEXT: lw s2, 8(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, s2, t6
+; RV32IMZBS-NEXT: xor s1, s1, s3
+; RV32IMZBS-NEXT: xor s2, s8, s9
+; RV32IMZBS-NEXT: xor a6, t0, a6
+; RV32IMZBS-NEXT: xor a3, s0, a3
+; RV32IMZBS-NEXT: xor a0, a0, t2
+; RV32IMZBS-NEXT: xor a1, a1, t3
+; RV32IMZBS-NEXT: lw t0, 4(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t6, t0
+; RV32IMZBS-NEXT: xor t2, s1, t4
+; RV32IMZBS-NEXT: xor t3, s2, s7
+; RV32IMZBS-NEXT: xor a6, a6, ra
; RV32IMZBS-NEXT: xor a3, a3, a4
-; RV32IMZBS-NEXT: lw a4, 80(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 24(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a4
+; RV32IMZBS-NEXT: xor a1, a1, t0
+; RV32IMZBS-NEXT: xor a4, t2, t5
+; RV32IMZBS-NEXT: xor t0, t3, s6
+; RV32IMZBS-NEXT: xor a6, a6, s10
; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: xor a0, a0, a6
-; RV32IMZBS-NEXT: lw a6, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a3, a2, a6
-; RV32IMZBS-NEXT: srli a4, a2, 8
-; RV32IMZBS-NEXT: xor a0, a2, a0
-; RV32IMZBS-NEXT: slli a3, a3, 8
-; RV32IMZBS-NEXT: and a2, a4, a6
-; RV32IMZBS-NEXT: srli a0, a0, 24
-; RV32IMZBS-NEXT: or a1, a1, a3
-; RV32IMZBS-NEXT: or a0, a2, a0
-; RV32IMZBS-NEXT: or a0, a1, a0
+; RV32IMZBS-NEXT: xor a1, a1, a4
+; RV32IMZBS-NEXT: xor a3, t0, s5
+; RV32IMZBS-NEXT: xor a4, a6, s11
+; RV32IMZBS-NEXT: xor a2, a2, a5
+; RV32IMZBS-NEXT: xor a1, a1, a3
+; RV32IMZBS-NEXT: lui a3, 349525
+; RV32IMZBS-NEXT: addi a3, a3, 1364
+; RV32IMZBS-NEXT: xor a4, a4, s4
+; RV32IMZBS-NEXT: xor a2, a2, t1
+; RV32IMZBS-NEXT: xor a1, a1, a4
+; RV32IMZBS-NEXT: xor a2, a2, a7
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: srli a1, a0, 8
+; RV32IMZBS-NEXT: srli a2, a0, 24
+; RV32IMZBS-NEXT: slli a4, a0, 24
+; RV32IMZBS-NEXT: lw a5, 40(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a0, a0, a5
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: slli a0, a0, 8
+; RV32IMZBS-NEXT: or a1, a1, a2
+; RV32IMZBS-NEXT: or a0, a4, a0
+; RV32IMZBS-NEXT: or a0, a0, a1
; RV32IMZBS-NEXT: srli a1, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, s1
-; RV32IMZBS-NEXT: and a1, a1, s1
+; RV32IMZBS-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: and a1, a1, a2
; RV32IMZBS-NEXT: slli a0, a0, 4
; RV32IMZBS-NEXT: or a0, a1, a0
; RV32IMZBS-NEXT: srli a1, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, s3
-; RV32IMZBS-NEXT: and a1, a1, s3
+; RV32IMZBS-NEXT: lw a2, 32(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: and a1, a1, a2
; RV32IMZBS-NEXT: slli a0, a0, 2
; RV32IMZBS-NEXT: or a0, a1, a0
; RV32IMZBS-NEXT: srli a1, a0, 1
-; RV32IMZBS-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 28(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: and a0, a0, a2
-; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: and a1, a1, a3
; RV32IMZBS-NEXT: slli a0, a0, 1
; RV32IMZBS-NEXT: or a0, a1, a0
; RV32IMZBS-NEXT: srli a0, a0, 1
-; RV32IMZBS-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: addi sp, sp, 144
+; RV32IMZBS-NEXT: lw ra, 92(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 88(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 84(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 80(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s3, 76(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s4, 72(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s5, 68(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s6, 64(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 60(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 56(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 52(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s10, 48(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: addi sp, sp, 96
; RV32IMZBS-NEXT: ret
;
; RV64IMZBS-LABEL: clmulh_i32:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -256
-; RV64IMZBS-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 232(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s2, 224(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s3, 216(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s4, 208(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s5, 200(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s6, 192(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s7, 184(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s8, 176(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s9, 168(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s10, 160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s11, 152(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: slli a0, a0, 32
-; RV64IMZBS-NEXT: andi a7, a1, 2
-; RV64IMZBS-NEXT: andi a2, a1, 1
-; RV64IMZBS-NEXT: sd a2, 144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a2, a1, 4
-; RV64IMZBS-NEXT: sd a2, 136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a2, a1, 8
-; RV64IMZBS-NEXT: sd a2, 128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a2, a1, 16
-; RV64IMZBS-NEXT: sd a2, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a2, a1, 32
-; RV64IMZBS-NEXT: sd a2, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti t0, zero, 11
-; RV64IMZBS-NEXT: lui t1, 1
-; RV64IMZBS-NEXT: lui t2, 2
-; RV64IMZBS-NEXT: lui t3, 4
-; RV64IMZBS-NEXT: lui t4, 8
-; RV64IMZBS-NEXT: lui t5, 16
-; RV64IMZBS-NEXT: lui t6, 32
-; RV64IMZBS-NEXT: lui s0, 64
-; RV64IMZBS-NEXT: lui s1, 128
-; RV64IMZBS-NEXT: lui s2, 256
-; RV64IMZBS-NEXT: lui s3, 512
-; RV64IMZBS-NEXT: lui s4, 1024
-; RV64IMZBS-NEXT: lui s5, 2048
-; RV64IMZBS-NEXT: lui s6, 4096
-; RV64IMZBS-NEXT: lui s7, 8192
-; RV64IMZBS-NEXT: lui s8, 16384
-; RV64IMZBS-NEXT: lui s9, 32768
-; RV64IMZBS-NEXT: lui s10, 65536
-; RV64IMZBS-NEXT: lui s11, 131072
-; RV64IMZBS-NEXT: lui ra, 262144
-; RV64IMZBS-NEXT: and t0, a1, t0
-; RV64IMZBS-NEXT: and a2, a1, t1
-; RV64IMZBS-NEXT: sd a2, 104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and t2, a1, t2
-; RV64IMZBS-NEXT: and t3, a1, t3
-; RV64IMZBS-NEXT: and a2, a1, t4
-; RV64IMZBS-NEXT: sd a2, 96(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, t5
-; RV64IMZBS-NEXT: sd a2, 88(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and t6, a1, t6
-; RV64IMZBS-NEXT: and s0, a1, s0
-; RV64IMZBS-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s1, a1, s1
-; RV64IMZBS-NEXT: and a2, a1, s2
-; RV64IMZBS-NEXT: sd a2, 72(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s3, a1, s3
-; RV64IMZBS-NEXT: and a2, a1, s4
-; RV64IMZBS-NEXT: sd a2, 64(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s5
-; RV64IMZBS-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s6, a1, s6
-; RV64IMZBS-NEXT: and a2, a1, s7
-; RV64IMZBS-NEXT: sd a2, 48(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s8
-; RV64IMZBS-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s9
-; RV64IMZBS-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s10
-; RV64IMZBS-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s11
-; RV64IMZBS-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, ra
-; RV64IMZBS-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a1, 64
-; RV64IMZBS-NEXT: andi a2, a1, 128
-; RV64IMZBS-NEXT: andi a3, a1, 256
-; RV64IMZBS-NEXT: andi a4, a1, 512
-; RV64IMZBS-NEXT: andi a5, a1, 1024
-; RV64IMZBS-NEXT: srliw a1, a1, 31
-; RV64IMZBS-NEXT: srli a0, a0, 32
-; RV64IMZBS-NEXT: slli s9, a1, 31
-; RV64IMZBS-NEXT: mul s10, a0, a7
-; RV64IMZBS-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s8, a0, a1
-; RV64IMZBS-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s7, a0, a1
-; RV64IMZBS-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s4, a0, a1
-; RV64IMZBS-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s2, a0, a1
-; RV64IMZBS-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s0, a0, a1
-; RV64IMZBS-NEXT: mul a1, a0, a6
-; RV64IMZBS-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t5, a0, a2
-; RV64IMZBS-NEXT: mul t4, a0, a3
-; RV64IMZBS-NEXT: mul ra, a0, a4
-; RV64IMZBS-NEXT: mul a1, a0, a5
-; RV64IMZBS-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t1, a0, t0
-; RV64IMZBS-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t0, a0, a1
-; RV64IMZBS-NEXT: mul s5, a0, t2
-; RV64IMZBS-NEXT: mul a1, a0, t3
-; RV64IMZBS-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a6, a0, a1
-; RV64IMZBS-NEXT: mul a5, a0, t6
-; RV64IMZBS-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, a0, a1
-; RV64IMZBS-NEXT: mul s1, a0, s1
-; RV64IMZBS-NEXT: ld a1, 72(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s11, a0, a1
-; RV64IMZBS-NEXT: mul a1, a0, s3
-; RV64IMZBS-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 64(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a1
-; RV64IMZBS-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a0, a1
-; RV64IMZBS-NEXT: mul a7, a0, s6
-; RV64IMZBS-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t2, a0, a1
-; RV64IMZBS-NEXT: ld a1, 40(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t6, a0, a1
-; RV64IMZBS-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s3, a0, a1
-; RV64IMZBS-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s6, a0, a1
-; RV64IMZBS-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a0, a1
-; RV64IMZBS-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: mul a0, a0, s9
-; RV64IMZBS-NEXT: xor s8, s8, s10
-; RV64IMZBS-NEXT: xor s4, s7, s4
-; RV64IMZBS-NEXT: xor s0, s2, s0
-; RV64IMZBS-NEXT: xor t4, t5, t4
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: xor a5, a6, a5
-; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: xor a2, s8, s4
-; RV64IMZBS-NEXT: ld a4, 128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, s0, a4
-; RV64IMZBS-NEXT: xor a6, t4, ra
-; RV64IMZBS-NEXT: xor t0, t0, s5
-; RV64IMZBS-NEXT: xor a5, a5, t3
-; RV64IMZBS-NEXT: xor a3, a3, a7
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: ld a4, 144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a6, a4
-; RV64IMZBS-NEXT: ld a6, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, t0, a6
-; RV64IMZBS-NEXT: xor a5, a5, s1
-; RV64IMZBS-NEXT: xor a3, a3, t2
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: ld a4, 136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a6, a4
-; RV64IMZBS-NEXT: xor a5, a5, s11
-; RV64IMZBS-NEXT: xor a3, a3, t6
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: ld a4, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, -176
+; RV64IMZBS-NEXT: sd ra, 168(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s0, 160(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 152(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 144(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s3, 136(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s4, 128(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s5, 120(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s6, 112(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s7, 104(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s8, 96(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s9, 88(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s10, 80(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s11, 72(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: mv s3, a1
+; RV64IMZBS-NEXT: slli a2, a0, 32
+; RV64IMZBS-NEXT: andi a3, a1, 2
+; RV64IMZBS-NEXT: andi a4, a1, 1
+; RV64IMZBS-NEXT: andi a5, a1, 4
+; RV64IMZBS-NEXT: andi a6, a1, 8
+; RV64IMZBS-NEXT: andi a7, a1, 16
+; RV64IMZBS-NEXT: andi t0, a1, 32
+; RV64IMZBS-NEXT: andi t1, a1, 64
+; RV64IMZBS-NEXT: andi t2, a1, 128
+; RV64IMZBS-NEXT: andi t3, a1, 256
+; RV64IMZBS-NEXT: andi t4, a1, 512
+; RV64IMZBS-NEXT: andi t5, a1, 1024
+; RV64IMZBS-NEXT: not s2, a1
+; RV64IMZBS-NEXT: srli t6, a2, 32
+; RV64IMZBS-NEXT: srli s0, a2, 31
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: seqz a4, a4
+; RV64IMZBS-NEXT: srli a0, a2, 30
+; RV64IMZBS-NEXT: seqz a5, a5
+; RV64IMZBS-NEXT: srli s1, a2, 29
+; RV64IMZBS-NEXT: seqz a6, a6
+; RV64IMZBS-NEXT: srli a1, a2, 28
+; RV64IMZBS-NEXT: seqz a7, a7
+; RV64IMZBS-NEXT: srli s4, a2, 27
+; RV64IMZBS-NEXT: seqz t0, t0
+; RV64IMZBS-NEXT: srli s5, a2, 26
+; RV64IMZBS-NEXT: seqz s6, t1
+; RV64IMZBS-NEXT: srli s7, a2, 25
+; RV64IMZBS-NEXT: seqz t2, t2
+; RV64IMZBS-NEXT: srli s8, a2, 24
+; RV64IMZBS-NEXT: seqz t3, t3
+; RV64IMZBS-NEXT: srli s9, a2, 23
+; RV64IMZBS-NEXT: seqz t4, t4
+; RV64IMZBS-NEXT: srli s10, a2, 22
+; RV64IMZBS-NEXT: seqz s11, t5
+; RV64IMZBS-NEXT: srli ra, a2, 21
+; RV64IMZBS-NEXT: bexti t1, s2, 11
+; RV64IMZBS-NEXT: srli t5, a2, 20
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, s0
+; RV64IMZBS-NEXT: sd a3, 64(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti s0, s2, 12
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a3, a4, t6
+; RV64IMZBS-NEXT: sd a3, 56(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli a3, a2, 19
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a0, a5, a0
+; RV64IMZBS-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, s2, 13
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: and a4, a6, s1
+; RV64IMZBS-NEXT: sd a4, 40(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli a4, a2, 18
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: and a1, a7, a1
+; RV64IMZBS-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti t6, s2, 14
+; RV64IMZBS-NEXT: addi t0, t0, -1
+; RV64IMZBS-NEXT: and a1, t0, s4
+; RV64IMZBS-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli s4, a2, 17
+; RV64IMZBS-NEXT: addi s6, s6, -1
+; RV64IMZBS-NEXT: and a1, s6, s5
+; RV64IMZBS-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti s5, s2, 15
+; RV64IMZBS-NEXT: addi t2, t2, -1
+; RV64IMZBS-NEXT: and a1, t2, s7
+; RV64IMZBS-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli s6, a2, 16
+; RV64IMZBS-NEXT: addi t3, t3, -1
+; RV64IMZBS-NEXT: and a7, t3, s8
+; RV64IMZBS-NEXT: bexti s7, s2, 16
+; RV64IMZBS-NEXT: addi t4, t4, -1
+; RV64IMZBS-NEXT: and t3, t4, s9
+; RV64IMZBS-NEXT: srli s8, a2, 15
+; RV64IMZBS-NEXT: addi s11, s11, -1
+; RV64IMZBS-NEXT: and t4, s11, s10
+; RV64IMZBS-NEXT: bexti s9, s2, 17
+; RV64IMZBS-NEXT: addi t1, t1, -1
+; RV64IMZBS-NEXT: and t1, t1, ra
+; RV64IMZBS-NEXT: srli s10, a2, 14
+; RV64IMZBS-NEXT: addi s0, s0, -1
+; RV64IMZBS-NEXT: and a5, s0, t5
+; RV64IMZBS-NEXT: bexti t5, s2, 18
+; RV64IMZBS-NEXT: addi s0, a0, -1
+; RV64IMZBS-NEXT: and s0, s0, a3
+; RV64IMZBS-NEXT: srli a3, a2, 13
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: and a4, t6, a4
+; RV64IMZBS-NEXT: bexti t6, s2, 19
+; RV64IMZBS-NEXT: addi s5, s5, -1
+; RV64IMZBS-NEXT: and a6, s5, s4
+; RV64IMZBS-NEXT: srli s11, a2, 12
+; RV64IMZBS-NEXT: addi s7, s7, -1
+; RV64IMZBS-NEXT: and s4, s7, s6
+; RV64IMZBS-NEXT: bexti ra, s2, 20
+; RV64IMZBS-NEXT: addi s9, s9, -1
+; RV64IMZBS-NEXT: and s7, s9, s8
+; RV64IMZBS-NEXT: srli s9, a2, 11
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: and s6, t5, s10
+; RV64IMZBS-NEXT: bexti t5, s2, 21
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: and s5, t6, a3
+; RV64IMZBS-NEXT: srli a3, a2, 10
+; RV64IMZBS-NEXT: addi ra, ra, -1
+; RV64IMZBS-NEXT: and s8, ra, s11
+; RV64IMZBS-NEXT: bexti t6, s2, 22
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: and s9, t5, s9
+; RV64IMZBS-NEXT: srli t5, a2, 9
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: and s10, t6, a3
+; RV64IMZBS-NEXT: bexti a3, s2, 23
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, t5
+; RV64IMZBS-NEXT: bexti t5, s2, 24
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: srli t6, a2, 8
+; RV64IMZBS-NEXT: and s11, t5, t6
+; RV64IMZBS-NEXT: bexti t5, s2, 25
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: srli t6, a2, 7
+; RV64IMZBS-NEXT: and t5, t5, t6
+; RV64IMZBS-NEXT: bexti t6, s2, 26
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: srli ra, a2, 6
+; RV64IMZBS-NEXT: and t6, t6, ra
+; RV64IMZBS-NEXT: bexti ra, s2, 27
+; RV64IMZBS-NEXT: addi ra, ra, -1
+; RV64IMZBS-NEXT: srli a0, a2, 5
+; RV64IMZBS-NEXT: and a0, ra, a0
+; RV64IMZBS-NEXT: bexti ra, s2, 28
+; RV64IMZBS-NEXT: addi ra, ra, -1
+; RV64IMZBS-NEXT: srli s1, a2, 4
+; RV64IMZBS-NEXT: and s1, ra, s1
+; RV64IMZBS-NEXT: bexti ra, s2, 29
+; RV64IMZBS-NEXT: addi ra, ra, -1
+; RV64IMZBS-NEXT: srli a1, a2, 3
+; RV64IMZBS-NEXT: and a1, ra, a1
+; RV64IMZBS-NEXT: bexti s2, s2, 30
+; RV64IMZBS-NEXT: addi s2, s2, -1
+; RV64IMZBS-NEXT: srli ra, a2, 2
+; RV64IMZBS-NEXT: and s2, s2, ra
+; RV64IMZBS-NEXT: sraiw s3, s3, 31
+; RV64IMZBS-NEXT: srli a2, a2, 1
+; RV64IMZBS-NEXT: seqz s3, s3
+; RV64IMZBS-NEXT: addi s3, s3, -1
+; RV64IMZBS-NEXT: and a2, s3, a2
+; RV64IMZBS-NEXT: ld s3, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, ra, s3
+; RV64IMZBS-NEXT: ld ra, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t0, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor ra, ra, t0
+; RV64IMZBS-NEXT: ld t0, 32(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t2, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, t2
+; RV64IMZBS-NEXT: ld t2, 8(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t2, a7
+; RV64IMZBS-NEXT: xor a5, t1, a5
+; RV64IMZBS-NEXT: xor t1, s4, s7
+; RV64IMZBS-NEXT: xor a3, s10, a3
+; RV64IMZBS-NEXT: xor a1, a1, s2
+; RV64IMZBS-NEXT: xor t2, s3, ra
+; RV64IMZBS-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s2
+; RV64IMZBS-NEXT: xor a7, a7, t3
+; RV64IMZBS-NEXT: xor a5, a5, s0
+; RV64IMZBS-NEXT: xor t1, t1, s6
+; RV64IMZBS-NEXT: xor a3, a3, s11
+; RV64IMZBS-NEXT: xor t0, t2, t0
+; RV64IMZBS-NEXT: xor a7, a7, t4
; RV64IMZBS-NEXT: xor a4, a5, a4
-; RV64IMZBS-NEXT: xor a3, a3, s3
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: xor a3, a3, s6
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: xor a0, a1, a0
-; RV64IMZBS-NEXT: xor a0, a2, a0
+; RV64IMZBS-NEXT: xor a5, t1, s5
+; RV64IMZBS-NEXT: xor a3, a3, t5
+; RV64IMZBS-NEXT: xor a7, t0, a7
+; RV64IMZBS-NEXT: xor a4, a4, a6
+; RV64IMZBS-NEXT: xor a5, a5, s8
+; RV64IMZBS-NEXT: xor a3, a3, t6
+; RV64IMZBS-NEXT: xor a4, a7, a4
+; RV64IMZBS-NEXT: xor a5, a5, s9
+; RV64IMZBS-NEXT: xor a0, a3, a0
+; RV64IMZBS-NEXT: xor a4, a4, a5
+; RV64IMZBS-NEXT: xor a0, a0, s1
+; RV64IMZBS-NEXT: xor a0, a4, a0
+; RV64IMZBS-NEXT: xor a1, a1, a2
+; RV64IMZBS-NEXT: xor a0, a0, a1
; RV64IMZBS-NEXT: srli a0, a0, 32
-; RV64IMZBS-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 232(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s2, 224(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s3, 216(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s4, 208(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s5, 200(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s6, 192(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s7, 184(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s8, 176(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s9, 168(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s10, 160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 152(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 256
+; RV64IMZBS-NEXT: ld ra, 168(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 152(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 144(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 136(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s4, 128(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s5, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s6, 112(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s7, 104(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s8, 96(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s9, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s10, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s11, 72(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 176
; RV64IMZBS-NEXT: ret
%a.ext = zext i32 %a to i64
%b.ext = zext i32 %b to i64
@@ -3677,554 +2356,59 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind {
declare i8 @use(i8, i1)
define void @commutative_clmulh_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) {
-; RV32I-LABEL: commutative_clmulh_i8:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: zext.b s2, a1
-; RV32I-NEXT: andi a1, a0, 2
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, a0, s4
-; RV32I-NEXT: andi a1, s3, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s3, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s3, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s4, s5, a0
-; RV32I-NEXT: andi a1, s3, 128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 0(s1)
-; RV32I-NEXT: sb a0, 0(s0)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: commutative_clmulh_i8:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: .cfi_def_cfa_offset 64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: .cfi_offset s4, -48
-; RV64I-NEXT: .cfi_offset s5, -56
-; RV64I-NEXT: mv s0, a3
-; RV64I-NEXT: mv s1, a2
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: zext.b s2, a1
-; RV64I-NEXT: andi a1, a0, 2
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, a0, s4
-; RV64I-NEXT: andi a1, s3, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s3, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: andi a1, s3, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s4, s5, a0
-; RV64I-NEXT: andi a1, s3, 128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 0(s1)
-; RV64I-NEXT: sb a0, 0(s0)
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: .cfi_restore s4
-; RV64I-NEXT: .cfi_restore s5
-; RV64I-NEXT: addi sp, sp, 64
-; RV64I-NEXT: .cfi_def_cfa_offset 0
-; RV64I-NEXT: ret
-;
-; CHECK-M-LABEL: commutative_clmulh_i8:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: zext.b a1, a1
-; CHECK-M-NEXT: andi a4, a0, 2
-; CHECK-M-NEXT: andi a5, a0, 1
-; CHECK-M-NEXT: andi a6, a0, 4
-; CHECK-M-NEXT: andi a7, a0, 8
-; CHECK-M-NEXT: andi t0, a0, 16
-; CHECK-M-NEXT: andi t1, a0, 32
-; CHECK-M-NEXT: andi t2, a0, 64
-; CHECK-M-NEXT: andi a0, a0, 128
-; CHECK-M-NEXT: mul a4, a1, a4
-; CHECK-M-NEXT: mul a5, a1, a5
-; CHECK-M-NEXT: mul a6, a1, a6
-; CHECK-M-NEXT: mul a7, a1, a7
-; CHECK-M-NEXT: mul t0, a1, t0
-; CHECK-M-NEXT: mul t1, a1, t1
-; CHECK-M-NEXT: mul t2, a1, t2
-; CHECK-M-NEXT: mul a0, a1, a0
-; CHECK-M-NEXT: xor a4, a5, a4
-; CHECK-M-NEXT: xor a1, a6, a7
-; CHECK-M-NEXT: xor a5, t0, t1
-; CHECK-M-NEXT: xor a1, a4, a1
-; CHECK-M-NEXT: xor a4, a5, t2
-; CHECK-M-NEXT: xor a1, a1, a4
-; CHECK-M-NEXT: xor a0, a1, a0
-; CHECK-M-NEXT: srli a0, a0, 8
-; CHECK-M-NEXT: sb a0, 0(a2)
-; CHECK-M-NEXT: sb a0, 0(a3)
-; CHECK-M-NEXT: ret
-;
-; CHECK-ZBS-LABEL: commutative_clmulh_i8:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: zext.b a1, a1
-; CHECK-ZBS-NEXT: andi a4, a0, 2
-; CHECK-ZBS-NEXT: andi a5, a0, 1
-; CHECK-ZBS-NEXT: andi a6, a0, 4
-; CHECK-ZBS-NEXT: andi a7, a0, 8
-; CHECK-ZBS-NEXT: andi t0, a0, 16
-; CHECK-ZBS-NEXT: andi t1, a0, 32
-; CHECK-ZBS-NEXT: andi t2, a0, 64
-; CHECK-ZBS-NEXT: andi a0, a0, 128
-; CHECK-ZBS-NEXT: mul a4, a1, a4
-; CHECK-ZBS-NEXT: mul a5, a1, a5
-; CHECK-ZBS-NEXT: mul a6, a1, a6
-; CHECK-ZBS-NEXT: mul a7, a1, a7
-; CHECK-ZBS-NEXT: mul t0, a1, t0
-; CHECK-ZBS-NEXT: mul t1, a1, t1
-; CHECK-ZBS-NEXT: mul t2, a1, t2
-; CHECK-ZBS-NEXT: mul a0, a1, a0
-; CHECK-ZBS-NEXT: xor a4, a5, a4
-; CHECK-ZBS-NEXT: xor a1, a6, a7
-; CHECK-ZBS-NEXT: xor a5, t0, t1
-; CHECK-ZBS-NEXT: xor a1, a4, a1
-; CHECK-ZBS-NEXT: xor a4, a5, t2
-; CHECK-ZBS-NEXT: xor a1, a1, a4
-; CHECK-ZBS-NEXT: xor a0, a1, a0
-; CHECK-ZBS-NEXT: srli a0, a0, 8
-; CHECK-ZBS-NEXT: sb a0, 0(a2)
-; CHECK-ZBS-NEXT: sb a0, 0(a3)
-; CHECK-ZBS-NEXT: ret
+; CHECK-LABEL: commutative_clmulh_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: zext.b a1, a1
+; CHECK-NEXT: andi a4, a0, 2
+; CHECK-NEXT: andi a5, a0, 4
+; CHECK-NEXT: andi a6, a0, 8
+; CHECK-NEXT: andi a7, a0, 16
+; CHECK-NEXT: andi t0, a0, 32
+; CHECK-NEXT: andi t1, a0, 64
+; CHECK-NEXT: slli t2, a1, 1
+; CHECK-NEXT: seqz a4, a4
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a4, a4, t2
+; CHECK-NEXT: slli t2, a1, 2
+; CHECK-NEXT: seqz a5, a5
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a5, a5, t2
+; CHECK-NEXT: slli t2, a1, 3
+; CHECK-NEXT: seqz a6, a6
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a6, a6, t2
+; CHECK-NEXT: slli t2, a1, 4
+; CHECK-NEXT: seqz a7, a7
+; CHECK-NEXT: addi a7, a7, -1
+; CHECK-NEXT: and a7, a7, t2
+; CHECK-NEXT: slli t2, a1, 5
+; CHECK-NEXT: seqz t0, t0
+; CHECK-NEXT: addi t0, t0, -1
+; CHECK-NEXT: and t0, t0, t2
+; CHECK-NEXT: slli t2, a1, 6
+; CHECK-NEXT: seqz t1, t1
+; CHECK-NEXT: addi t1, t1, -1
+; CHECK-NEXT: and t1, t1, t2
+; CHECK-NEXT: andi t2, a0, 1
+; CHECK-NEXT: andi a0, a0, 128
+; CHECK-NEXT: seqz t2, t2
+; CHECK-NEXT: addi t2, t2, -1
+; CHECK-NEXT: and t2, t2, a1
+; CHECK-NEXT: slli a1, a1, 7
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: xor a1, t2, a4
+; CHECK-NEXT: xor a4, a5, a6
+; CHECK-NEXT: xor a5, a7, t0
+; CHECK-NEXT: xor a1, a1, a4
+; CHECK-NEXT: xor a4, a5, t1
+; CHECK-NEXT: xor a1, a1, a4
+; CHECK-NEXT: xor a0, a1, a0
+; CHECK-NEXT: srli a0, a0, 8
+; CHECK-NEXT: sb a0, 0(a2)
+; CHECK-NEXT: sb a0, 0(a3)
+; CHECK-NEXT: ret
%x.ext = zext i8 %x to i16
%y.ext = zext i8 %y to i16
%clmul_xy = call i16 @llvm.clmul.i16(i16 %x.ext, i16 %y.ext)
@@ -4241,6119 +2425,6939 @@ define void @commutative_clmulh_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) {
define void @commutative_clmulh_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %p1) nounwind {
; RV32I-LABEL: commutative_clmulh_v2i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -224
-; RV32I-NEXT: sw ra, 220(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 216(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 212(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 208(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 204(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 200(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 196(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 192(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 188(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 184(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 180(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s10, 176(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s11, 172(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a3, 164(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a2, 160(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s7, 0(a0)
-; RV32I-NEXT: lw a2, 4(a0)
-; RV32I-NEXT: lw a3, 8(a0)
-; RV32I-NEXT: sw a3, 156(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, 12(a0)
-; RV32I-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s9, 0(a1)
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw a3, 8(a1)
-; RV32I-NEXT: sw a3, 152(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lui a4, 16
+; RV32I-NEXT: addi sp, sp, -816
+; RV32I-NEXT: sw ra, 812(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 808(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 804(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 800(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 796(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 792(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 788(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 784(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 780(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s8, 776(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s9, 772(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s10, 768(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s11, 764(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a3, 752(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a2, 748(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 0(a0)
+; RV32I-NEXT: lw t4, 4(a0)
+; RV32I-NEXT: lw a7, 8(a0)
+; RV32I-NEXT: lw t2, 12(a0)
+; RV32I-NEXT: lw s5, 0(a1)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw t1, 8(a1)
+; RV32I-NEXT: lw a0, 12(a1)
+; RV32I-NEXT: lui a5, 16
; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: lui a3, 209715
-; RV32I-NEXT: lui a5, 349525
-; RV32I-NEXT: addi s6, a4, -256
-; RV32I-NEXT: addi s5, a1, -241
-; RV32I-NEXT: addi s4, a3, 819
-; RV32I-NEXT: addi s3, a5, 1365
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a3, a0, 24
-; RV32I-NEXT: and a4, a0, s6
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srli a5, a2, 8
-; RV32I-NEXT: srli a6, a2, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: and a3, a2, s6
-; RV32I-NEXT: slli a2, a2, 24
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: lui a4, 349525
+; RV32I-NEXT: addi s11, a5, -256
+; RV32I-NEXT: addi s10, a1, -241
+; RV32I-NEXT: addi s9, a2, 819
+; RV32I-NEXT: addi s8, a4, 1365
+; RV32I-NEXT: srli a1, a3, 8
+; RV32I-NEXT: srli t5, a3, 24
+; RV32I-NEXT: and a2, a3, s11
+; RV32I-NEXT: slli a3, a3, 24
+; RV32I-NEXT: srli a5, t4, 8
+; RV32I-NEXT: srli a6, t4, 24
+; RV32I-NEXT: and a4, t4, s11
+; RV32I-NEXT: slli t4, t4, 24
+; RV32I-NEXT: srli s2, s5, 8
+; RV32I-NEXT: srli t3, s5, 24
+; RV32I-NEXT: and s3, s5, s11
+; RV32I-NEXT: slli s5, s5, 24
+; RV32I-NEXT: srli t6, t0, 8
+; RV32I-NEXT: srli s1, t0, 24
+; RV32I-NEXT: and s0, t0, s11
+; RV32I-NEXT: slli t0, t0, 24
+; RV32I-NEXT: and a1, a1, s11
+; RV32I-NEXT: or a1, a1, t5
+; RV32I-NEXT: srli t5, a0, 8
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: or a2, a3, a2
+; RV32I-NEXT: srli s6, a0, 24
+; RV32I-NEXT: and a3, a5, s11
+; RV32I-NEXT: or a3, a3, a6
+; RV32I-NEXT: and s4, a0, s11
+; RV32I-NEXT: slli s7, a0, 24
; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: and a5, a5, s6
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a0, a0, a4
-; RV32I-NEXT: or a4, a5, a6
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: or a2, a2, a4
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: srli a3, a2, 4
-; RV32I-NEXT: and a2, a2, s5
-; RV32I-NEXT: and a1, a1, s5
+; RV32I-NEXT: or a0, t4, a4
+; RV32I-NEXT: srli a5, t2, 8
+; RV32I-NEXT: and a4, s2, s11
+; RV32I-NEXT: or a4, a4, t3
+; RV32I-NEXT: srli a6, t2, 24
+; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or t4, s5, s3
+; RV32I-NEXT: and s2, t2, s11
+; RV32I-NEXT: slli s3, t2, 24
+; RV32I-NEXT: and t2, t6, s11
+; RV32I-NEXT: or t2, t2, s1
+; RV32I-NEXT: srli t3, t1, 8
+; RV32I-NEXT: slli s0, s0, 8
+; RV32I-NEXT: or s0, t0, s0
+; RV32I-NEXT: srli s5, t1, 24
+; RV32I-NEXT: and t0, t5, s11
+; RV32I-NEXT: or t0, t0, s6
+; RV32I-NEXT: and t6, t1, s11
+; RV32I-NEXT: slli t5, t1, 24
+; RV32I-NEXT: slli s4, s4, 8
+; RV32I-NEXT: or t1, s7, s4
+; RV32I-NEXT: srli s1, a7, 8
+; RV32I-NEXT: and a5, a5, s11
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: srli a6, a7, 24
+; RV32I-NEXT: slli s2, s2, 8
+; RV32I-NEXT: or s2, s3, s2
+; RV32I-NEXT: and s3, a7, s11
+; RV32I-NEXT: slli a7, a7, 24
+; RV32I-NEXT: and t3, t3, s11
+; RV32I-NEXT: slli t6, t6, 8
+; RV32I-NEXT: and s1, s1, s11
+; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or t3, t3, s5
+; RV32I-NEXT: or t5, t5, t6
+; RV32I-NEXT: or a6, s1, a6
+; RV32I-NEXT: or a7, a7, s3
+; RV32I-NEXT: or a1, a2, a1
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: or a2, t4, a4
+; RV32I-NEXT: or a3, s0, t2
+; RV32I-NEXT: or a4, t1, t0
+; RV32I-NEXT: or a5, s2, a5
+; RV32I-NEXT: or t0, t5, t3
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: srli a7, a1, 4
+; RV32I-NEXT: sw s10, 760(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, s10
+; RV32I-NEXT: srli t1, a0, 4
+; RV32I-NEXT: and a0, a0, s10
+; RV32I-NEXT: srli t2, a2, 4
+; RV32I-NEXT: and a2, a2, s10
+; RV32I-NEXT: srli t3, a3, 4
+; RV32I-NEXT: and a3, a3, s10
+; RV32I-NEXT: srli t4, a4, 4
+; RV32I-NEXT: and a4, a4, s10
+; RV32I-NEXT: srli t5, a5, 4
+; RV32I-NEXT: and a5, a5, s10
+; RV32I-NEXT: srli t6, t0, 4
+; RV32I-NEXT: and t0, t0, s10
+; RV32I-NEXT: srli s0, a6, 4
+; RV32I-NEXT: and a6, a6, s10
+; RV32I-NEXT: and a7, a7, s10
+; RV32I-NEXT: slli a1, a1, 4
+; RV32I-NEXT: and t1, t1, s10
; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: and a3, a3, s5
+; RV32I-NEXT: and t2, t2, s10
; RV32I-NEXT: slli a2, a2, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: srli a3, a2, 2
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and t3, t3, s10
+; RV32I-NEXT: slli a3, a3, 4
+; RV32I-NEXT: and t4, t4, s10
+; RV32I-NEXT: slli a4, a4, 4
+; RV32I-NEXT: and t5, t5, s10
+; RV32I-NEXT: slli a5, a5, 4
+; RV32I-NEXT: and t6, t6, s10
+; RV32I-NEXT: slli t0, t0, 4
+; RV32I-NEXT: and s0, s0, s10
+; RV32I-NEXT: slli a6, a6, 4
+; RV32I-NEXT: or a1, a7, a1
+; RV32I-NEXT: or a0, t1, a0
+; RV32I-NEXT: or a2, t2, a2
+; RV32I-NEXT: or a3, t3, a3
+; RV32I-NEXT: or a4, t4, a4
+; RV32I-NEXT: or a5, t5, a5
+; RV32I-NEXT: or a7, t6, t0
+; RV32I-NEXT: or a6, s0, a6
+; RV32I-NEXT: srli t0, a1, 2
+; RV32I-NEXT: sw s9, 740(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, s9
+; RV32I-NEXT: srli t1, a0, 2
+; RV32I-NEXT: and a0, a0, s9
+; RV32I-NEXT: srli t2, a2, 2
+; RV32I-NEXT: and a2, a2, s9
+; RV32I-NEXT: srli t3, a3, 2
+; RV32I-NEXT: and a3, a3, s9
+; RV32I-NEXT: srli t4, a4, 2
+; RV32I-NEXT: and a4, a4, s9
+; RV32I-NEXT: srli t5, a5, 2
+; RV32I-NEXT: and a5, a5, s9
+; RV32I-NEXT: srli t6, a7, 2
+; RV32I-NEXT: and a7, a7, s9
+; RV32I-NEXT: srli s0, a6, 2
+; RV32I-NEXT: and a6, a6, s9
+; RV32I-NEXT: and t0, t0, s9
+; RV32I-NEXT: slli a1, a1, 2
+; RV32I-NEXT: and t1, t1, s9
; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s4
+; RV32I-NEXT: and t2, t2, s9
; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: srli a3, a2, 1
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, s3
+; RV32I-NEXT: and t3, t3, s9
+; RV32I-NEXT: slli a3, a3, 2
+; RV32I-NEXT: and t4, t4, s9
+; RV32I-NEXT: slli a4, a4, 2
+; RV32I-NEXT: and t5, t5, s9
+; RV32I-NEXT: slli a5, a5, 2
+; RV32I-NEXT: and t6, t6, s9
+; RV32I-NEXT: slli a7, a7, 2
+; RV32I-NEXT: and s0, s0, s9
+; RV32I-NEXT: slli a6, a6, 2
+; RV32I-NEXT: or a1, t0, a1
+; RV32I-NEXT: or a0, t1, a0
+; RV32I-NEXT: or a2, t2, a2
+; RV32I-NEXT: or a3, t3, a3
+; RV32I-NEXT: or a4, t4, a4
+; RV32I-NEXT: or a5, t5, a5
+; RV32I-NEXT: or a7, t6, a7
+; RV32I-NEXT: or a6, s0, a6
+; RV32I-NEXT: srli t0, a1, 1
+; RV32I-NEXT: sw s8, 744(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a1, a1, s8
+; RV32I-NEXT: srli t1, a0, 1
+; RV32I-NEXT: and a0, a0, s8
+; RV32I-NEXT: srli t2, a2, 1
+; RV32I-NEXT: and a2, a2, s8
+; RV32I-NEXT: srli t3, a3, 1
+; RV32I-NEXT: and a3, a3, s8
+; RV32I-NEXT: srli t4, a4, 1
+; RV32I-NEXT: and a4, a4, s8
+; RV32I-NEXT: srli t5, a5, 1
+; RV32I-NEXT: and a5, a5, s8
+; RV32I-NEXT: srli t6, a7, 1
+; RV32I-NEXT: and a7, a7, s8
+; RV32I-NEXT: srli s0, a6, 1
+; RV32I-NEXT: and a6, a6, s8
+; RV32I-NEXT: and t0, t0, s8
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: and t1, t1, s8
+; RV32I-NEXT: slli s1, a0, 1
+; RV32I-NEXT: and s2, t2, s8
; RV32I-NEXT: slli a2, a2, 1
-; RV32I-NEXT: or s2, a1, a0
-; RV32I-NEXT: or s8, a3, a2
-; RV32I-NEXT: srli a0, s2, 8
-; RV32I-NEXT: srli a1, s2, 24
-; RV32I-NEXT: slli a2, s2, 24
-; RV32I-NEXT: and a3, s2, s6
-; RV32I-NEXT: srli a4, s8, 8
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, s8, 24
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: slli a3, s8, 24
-; RV32I-NEXT: and a4, a4, s6
-; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: and a4, s8, s6
+; RV32I-NEXT: and t3, t3, s8
+; RV32I-NEXT: slli s3, a3, 1
+; RV32I-NEXT: and t4, t4, s8
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: and t5, t5, s8
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: and t6, t6, s8
+; RV32I-NEXT: slli s4, a7, 1
+; RV32I-NEXT: and s6, s0, s8
+; RV32I-NEXT: slli t2, a6, 1
+; RV32I-NEXT: or a0, t0, a1
+; RV32I-NEXT: or a1, t1, s1
+; RV32I-NEXT: or a2, s2, a2
+; RV32I-NEXT: srli a7, s1, 31
+; RV32I-NEXT: or a3, t3, s3
+; RV32I-NEXT: srli t1, s3, 31
+; RV32I-NEXT: or s0, t4, a4
+; RV32I-NEXT: or s5, t5, a5
+; RV32I-NEXT: or s2, t6, s4
+; RV32I-NEXT: srli a4, a5, 31
+; RV32I-NEXT: or s6, s6, t2
+; RV32I-NEXT: srli t3, t2, 31
+; RV32I-NEXT: srli a5, a0, 8
+; RV32I-NEXT: srli a6, a0, 24
+; RV32I-NEXT: srli t0, a1, 8
+; RV32I-NEXT: srli t2, a1, 24
+; RV32I-NEXT: slli t4, a1, 24
+; RV32I-NEXT: and t5, a1, s11
+; RV32I-NEXT: slli t6, a2, 31
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: slli s1, a0, 31
+; RV32I-NEXT: seqz t1, t1
+; RV32I-NEXT: and a5, a5, s11
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: srli s4, s0, 8
+; RV32I-NEXT: and a6, t0, s11
+; RV32I-NEXT: or a6, a6, t2
+; RV32I-NEXT: srli t2, s0, 24
+; RV32I-NEXT: slli t5, t5, 8
+; RV32I-NEXT: or t0, t4, t5
+; RV32I-NEXT: srli t4, s5, 8
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: addi t1, t1, -1
+; RV32I-NEXT: and t5, a7, t6
+; RV32I-NEXT: sw t5, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and t1, t1, s1
+; RV32I-NEXT: sw t1, 728(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a7, a7, s1
+; RV32I-NEXT: sw a7, 736(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a7, s5, 24
+; RV32I-NEXT: and t1, s4, s11
+; RV32I-NEXT: or t2, t1, t2
+; RV32I-NEXT: slli t5, s5, 24
+; RV32I-NEXT: and t1, t4, s11
+; RV32I-NEXT: or t1, t1, a7
+; RV32I-NEXT: and a7, s5, s11
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a7, t5, a7
+; RV32I-NEXT: slli t4, s2, 31
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi t5, t3, -1
+; RV32I-NEXT: and t3, a4, t4
+; RV32I-NEXT: sw t3, 724(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli t3, s0, 31
+; RV32I-NEXT: and t4, t5, t3
+; RV32I-NEXT: sw t4, 732(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, t3
+; RV32I-NEXT: sw a4, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a0, s11
+; RV32I-NEXT: sw s11, 756(sp) # 4-byte Folded Spill
; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: and a2, a2, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: and a3, a3, s5
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s4
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: or s0, a2, a0
-; RV32I-NEXT: or s11, a3, a1
-; RV32I-NEXT: andi a1, s11, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s11, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s10, a0, s1
-; RV32I-NEXT: andi a1, s11, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s11, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: andi a1, s11, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s11, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s11, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: andi a1, s11, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s11, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s11, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s11, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a1, s11, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
+; RV32I-NEXT: slli t4, a0, 24
+; RV32I-NEXT: or a4, t4, a4
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: sw a4, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a4, t0, a6
+; RV32I-NEXT: sw a4, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 2
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 2
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 1
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 1
+; RV32I-NEXT: and s1, a5, a6
+; RV32I-NEXT: and t5, a4, a6
+; RV32I-NEXT: andi a4, a1, 4
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 4
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 2
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 2
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 8
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 8
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 3
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 3
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 16
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 16
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 4
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 4
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 32
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 32
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 5
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 5
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 64
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 64
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 6
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 6
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 128
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 128
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 7
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 7
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 256
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 256
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 8
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 8
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 564(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 512
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 512
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 9
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 9
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 1024
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 1024
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 10
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 10
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a4, 1
+; RV32I-NEXT: slli t0, a4, 11
+; RV32I-NEXT: and a4, a1, t0
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, t0
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 11
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 508(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 11
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 532(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 1
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 12
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 12
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 504(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 512(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 2
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 13
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 13
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 4
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 14
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 14
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 8
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 15
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 15
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 652(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 16
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 16
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 432(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 16
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 440(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 444(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 32
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 17
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 17
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 420(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 436(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 64
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 18
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 460(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 18
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 468(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 484(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 128
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 19
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 19
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 524(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 256
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 20
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 20
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 560(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 512
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 21
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 21
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 1024
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 22
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 380(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 22
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 392(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 396(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 2048
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 23
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 368(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 23
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 372(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 388(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 4096
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 24
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 400(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a5, a5, t4
+; RV32I-NEXT: sw a5, 404(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, t4
+; RV32I-NEXT: sw a4, 408(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 8192
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 25
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 416(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 25
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 424(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 428(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 16384
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 26
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 448(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 26
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 452(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 464(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 32768
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 27
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 472(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 27
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 476(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 480(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 65536
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 28
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 492(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 28
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 488(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 496(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 131072
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 29
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 356(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 29
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 360(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 364(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 262144
+; RV32I-NEXT: andi a4, a1, 1
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: lui t3, 262144
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 1
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 30
+; RV32I-NEXT: and a2, a4, a2
+; RV32I-NEXT: sw a2, 324(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a5, a5, a0
+; RV32I-NEXT: sw a5, 348(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a0
+; RV32I-NEXT: slli a0, a0, 30
+; RV32I-NEXT: and a2, a3, t3
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a3, a1, a6
+; RV32I-NEXT: sw a3, 316(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, a2, a0
+; RV32I-NEXT: sw a2, 344(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and t3, a1, a0
+; RV32I-NEXT: and a0, s0, s11
+; RV32I-NEXT: slli a0, a0, 8
+; RV32I-NEXT: slli t4, s0, 24
+; RV32I-NEXT: or a0, t4, a0
+; RV32I-NEXT: or a0, a0, t2
+; RV32I-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a0, a7, t1
+; RV32I-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, s5, 2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 2
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 1
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 320(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 1
+; RV32I-NEXT: and t2, a1, a2
+; RV32I-NEXT: and a7, a0, a2
+; RV32I-NEXT: andi a0, s5, 4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 4
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 2
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 296(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 312(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, s5, 8
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 8
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 3
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 280(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 3
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 292(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, s5, 16
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 16
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 4
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 256(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 4
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 288(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, s5, 32
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 32
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 5
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 228(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 5
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 264(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, s5, 64
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 64
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 6
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 300(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 6
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, s5, 128
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 128
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 7
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 212(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 7
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, s5, 256
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 256
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 8
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 188(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 8
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 216(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, s5, 512
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 512
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 9
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 232(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 9
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, s5, 1024
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 1024
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 10
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 336(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 10
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 340(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw t0, 456(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s5, t0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, t0
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 11
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 140(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 11
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 148(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 12
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 120(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 12
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 2
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 13
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 168(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 13
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 172(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 4
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 14
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 236(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 14
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 8
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 15
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 272(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 15
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 276(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 16
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 16
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 32
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 17
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 17
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 64
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 18
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 96(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 18
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 128
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 19
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 152(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 19
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 164(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 256
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 20
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 176(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 20
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 180(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 512
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 21
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 196(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, s0, 21
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a1, 1024
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: and a1, s11, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, s2, 22
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, s0, 22
+; RV32I-NEXT: and a2, a1, a5
+; RV32I-NEXT: and a0, a0, a5
+; RV32I-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 2048
+; RV32I-NEXT: and a0, s5, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a3, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a0, s2, 23
+; RV32I-NEXT: and a0, a3, a0
+; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, s0, 23
+; RV32I-NEXT: and a5, a1, a6
+; RV32I-NEXT: and a0, a3, a6
+; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and a6, s5, a0
+; RV32I-NEXT: seqz a6, a6
+; RV32I-NEXT: and t0, s6, a0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t1, a6, -1
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: slli a6, s2, 24
+; RV32I-NEXT: and a0, t1, a6
+; RV32I-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t0, t4
+; RV32I-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t1, t4
+; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and t4, s5, a0
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: and t6, s6, a0
+; RV32I-NEXT: seqz t6, t6
+; RV32I-NEXT: addi s4, t4, -1
+; RV32I-NEXT: addi t6, t6, -1
+; RV32I-NEXT: slli t4, s2, 25
+; RV32I-NEXT: and a0, s4, t4
+; RV32I-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s7, s0, 25
+; RV32I-NEXT: and a0, t6, s7
+; RV32I-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s4, s7
+; RV32I-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and s7, s5, a0
+; RV32I-NEXT: seqz s7, s7
+; RV32I-NEXT: and s8, s6, a0
+; RV32I-NEXT: seqz s8, s8
+; RV32I-NEXT: addi s9, s7, -1
+; RV32I-NEXT: addi s8, s8, -1
+; RV32I-NEXT: slli s7, s2, 26
+; RV32I-NEXT: and a0, s9, s7
+; RV32I-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s10, s0, 26
+; RV32I-NEXT: and a0, s8, s10
+; RV32I-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s9, s10
+; RV32I-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and s9, s5, a0
+; RV32I-NEXT: seqz s9, s9
+; RV32I-NEXT: and s10, s6, a0
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: addi s9, s9, -1
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: slli s11, s2, 27
+; RV32I-NEXT: and a0, s9, s11
+; RV32I-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s11, s0, 27
+; RV32I-NEXT: and a0, s10, s11
+; RV32I-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s9, s11
+; RV32I-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lui a1, 131072
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: and a1, s11, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: and s9, s5, a0
+; RV32I-NEXT: seqz s9, s9
+; RV32I-NEXT: and s10, s6, a0
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: addi s9, s9, -1
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: slli s11, s2, 28
+; RV32I-NEXT: and a0, s9, s11
+; RV32I-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s11, s0, 28
+; RV32I-NEXT: and a0, s10, s11
+; RV32I-NEXT: sw a0, 124(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s9, s11
+; RV32I-NEXT: sw a0, 136(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a0, 131072
+; RV32I-NEXT: and s9, s5, a0
+; RV32I-NEXT: seqz s9, s9
+; RV32I-NEXT: and s10, s6, a0
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: addi s9, s9, -1
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: slli s11, s2, 29
+; RV32I-NEXT: and a6, s9, s11
+; RV32I-NEXT: slli ra, s0, 29
+; RV32I-NEXT: and a3, s10, ra
+; RV32I-NEXT: and a0, s9, ra
+; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi ra, s5, 1
; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s11, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lui a4, 349525
-; RV32I-NEXT: addi a4, a4, 1364
-; RV32I-NEXT: sw a4, 168(sp) # 4-byte Folded Spill
-; RV32I-NEXT: srli a1, s9, 8
-; RV32I-NEXT: xor a0, s10, a0
-; RV32I-NEXT: srli a2, s9, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: and a2, s9, s6
-; RV32I-NEXT: slli s9, s9, 24
-; RV32I-NEXT: slli a2, a2, 8
-; RV32I-NEXT: or a2, s9, a2
-; RV32I-NEXT: srli a3, a0, 8
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: and a3, a3, s6
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: slli a3, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: and a2, a3, s5
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: srli a3, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: and a2, a3, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: and a2, a3, s4
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: srli a3, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: and a2, a3, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: and a2, a3, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: srli a3, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: or s1, a2, a1
-; RV32I-NEXT: and a1, a3, a4
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: andi a1, s8, 2
-; RV32I-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s8, 1
-; RV32I-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, a0, s0
-; RV32I-NEXT: andi a1, s8, 4
-; RV32I-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s8, 8
-; RV32I-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: andi a1, s8, 16
-; RV32I-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s8, 32
-; RV32I-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s8, 64
-; RV32I-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: andi a1, s8, 128
-; RV32I-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s8, 256
-; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s8, 512
-; RV32I-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s8, 1024
-; RV32I-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, s8, s11
-; RV32I-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s10, s9, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s9, s10, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and s10, s8, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s10
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: srli a1, s7, 8
-; RV32I-NEXT: xor s8, s9, a0
-; RV32I-NEXT: srli a0, s7, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: and a1, s7, s6
-; RV32I-NEXT: slli s7, s7, 24
-; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: or a1, s7, a1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or s7, a1, a0
-; RV32I-NEXT: andi a1, s7, 2
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, a0, s1
-; RV32I-NEXT: andi a1, s7, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 256
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 512
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 1024
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: and a1, s7, s11
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor a0, a0, s8
-; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and s5, s5, a0
+; RV32I-NEXT: seqz ra, ra
+; RV32I-NEXT: andi s8, s6, 1
+; RV32I-NEXT: seqz s8, s8
+; RV32I-NEXT: addi ra, ra, -1
+; RV32I-NEXT: addi s8, s8, -1
+; RV32I-NEXT: slli s3, s2, 30
+; RV32I-NEXT: and s2, ra, s2
+; RV32I-NEXT: and s8, s8, s0
+; RV32I-NEXT: and a1, ra, s0
+; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s0, s0, 30
+; RV32I-NEXT: and s6, s6, a0
+; RV32I-NEXT: seqz s5, s5
+; RV32I-NEXT: seqz s6, s6
+; RV32I-NEXT: addi s5, s5, -1
+; RV32I-NEXT: addi s6, s6, -1
+; RV32I-NEXT: and s3, s5, s3
+; RV32I-NEXT: and s6, s6, s0
+; RV32I-NEXT: and a0, s5, s0
+; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 324(sp) # 4-byte Folded Reload
; RV32I-NEXT: xor a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: slli a3, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: lw a2, 168(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli s8, a0, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 140(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, a0, s1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 132(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 380(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 316(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, s1
+; RV32I-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 504(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 344(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, a4, t5
+; RV32I-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, a0, a1
+; RV32I-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, a0, a1
+; RV32I-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a0, a1
+; RV32I-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 512(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 396(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 364(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, t3
+; RV32I-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 320(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, s2, a0
+; RV32I-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 280(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, a0, a1
+; RV32I-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, a0, a1
+; RV32I-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 188(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, a0, a1
+; RV32I-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, a0, s0
+; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, a0, s1
+; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, a0, a1
+; RV32I-NEXT: xor s3, a6, s3
+; RV32I-NEXT: xor s8, s8, t2
+; RV32I-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 292(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a0, a1
+; RV32I-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a0, a1
+; RV32I-NEXT: lw a0, 220(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a0, a1
+; RV32I-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: xor t3, a0, a1
+; RV32I-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s7, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s10
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
+; RV32I-NEXT: xor t4, a0, a1
+; RV32I-NEXT: xor t5, a2, a5
+; RV32I-NEXT: xor t6, a3, s6
+; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a0, a7
+; RV32I-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 288(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 264(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 224(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 216(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: lw a3, 156(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a4, 144(sp) # 4-byte Folded Reload
-; RV32I-NEXT: srli a2, a4, 8
-; RV32I-NEXT: srli a1, a4, 24
-; RV32I-NEXT: and a3, a4, s6
-; RV32I-NEXT: slli a4, a4, 24
-; RV32I-NEXT: lw t0, 148(sp) # 4-byte Folded Reload
-; RV32I-NEXT: srli a5, t0, 8
-; RV32I-NEXT: srli a6, t0, 24
-; RV32I-NEXT: xor a0, s7, a0
-; RV32I-NEXT: and a7, t0, s6
-; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: and a2, a2, s6
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: and a5, a5, s6
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: srli a2, a0, 8
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: srli a4, a0, 24
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a6, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: and a2, a2, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: or a3, a7, a5
-; RV32I-NEXT: or a2, a2, a4
-; RV32I-NEXT: or a0, a6, a0
-; RV32I-NEXT: srli a4, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: srli a5, a3, 4
-; RV32I-NEXT: and a3, a3, s5
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: and a2, a4, s5
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: and a4, a5, s5
-; RV32I-NEXT: slli a3, a3, 4
-; RV32I-NEXT: srli a5, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: and a2, a5, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: srli a4, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: srli a5, a3, 2
-; RV32I-NEXT: and a3, a3, s4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: and a2, a4, s4
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: and a4, a5, s4
-; RV32I-NEXT: slli a3, a3, 2
-; RV32I-NEXT: srli a5, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: and a2, a5, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: srli a4, a1, 1
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: srli a5, a3, 1
-; RV32I-NEXT: and a3, a3, s3
-; RV32I-NEXT: or a6, a2, a0
-; RV32I-NEXT: sw a6, 144(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a0, a4, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: and a2, a5, s3
-; RV32I-NEXT: slli a3, a3, 1
-; RV32I-NEXT: srli a4, a6, 1
-; RV32I-NEXT: sw a4, 140(sp) # 4-byte Folded Spill
-; RV32I-NEXT: or s2, a0, a1
-; RV32I-NEXT: or s7, a2, a3
-; RV32I-NEXT: slli a0, a4, 31
-; RV32I-NEXT: srli a1, s2, 8
-; RV32I-NEXT: srli a2, s2, 24
-; RV32I-NEXT: slli a3, s2, 24
-; RV32I-NEXT: and a4, s2, s6
-; RV32I-NEXT: or a0, s8, a0
-; RV32I-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
-; RV32I-NEXT: srli a0, s7, 8
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: srli a2, s7, 24
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: slli a4, s7, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: and a2, s7, s6
-; RV32I-NEXT: slli a2, a2, 8
-; RV32I-NEXT: or a2, a4, a2
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: srli a2, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: srli a3, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a2, a2, s5
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: and a3, a3, s5
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: lw a4, 80(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a6
+; RV32I-NEXT: lw a6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, s6
+; RV32I-NEXT: lw s6, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s6, s9
+; RV32I-NEXT: sw s6, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 460(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 400(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 508(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 432(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s6, s9
+; RV32I-NEXT: sw s6, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 564(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 468(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 404(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 504(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 728(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 440(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 728(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s6, s10
+; RV32I-NEXT: sw s6, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, s11, s6
+; RV32I-NEXT: lw s6, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, ra, s6
+; RV32I-NEXT: lw s6, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 484(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 532(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, s9, s6
+; RV32I-NEXT: lw s6, 408(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 512(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 736(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 444(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, s6
+; RV32I-NEXT: sw s6, 736(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s6, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, s6, s4
+; RV32I-NEXT: sw s4, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s4, 300(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, s5, s4
+; RV32I-NEXT: lw s4, 232(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, s7, s4
+; RV32I-NEXT: sw s4, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s4, 168(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, s0, s4
+; RV32I-NEXT: lw s0, 96(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s1, s0
+; RV32I-NEXT: sw s0, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s2, s0
+; RV32I-NEXT: sw s0, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 724(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s3, s0
+; RV32I-NEXT: sw s0, 724(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor s0, s8, t0
+; RV32I-NEXT: lw t0, 308(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t1, t0
+; RV32I-NEXT: lw t0, 244(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, t2, t0
+; RV32I-NEXT: lw t0, 172(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, t0
+; RV32I-NEXT: lw t0, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, t0
+; RV32I-NEXT: lw t0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, t0
+; RV32I-NEXT: lw t0, 732(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t6, t0
+; RV32I-NEXT: sw t0, 732(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a7, a7, a0
+; RV32I-NEXT: lw a0, 328(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, a1, a0
+; RV32I-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: lw a0, 116(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a0
+; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a5, a0
+; RV32I-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a6, a0
+; RV32I-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a1, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a5, a1, 4
+; RV32I-NEXT: lw a0, 760(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a6, a1, a0
+; RV32I-NEXT: and a5, a5, a0
+; RV32I-NEXT: slli a6, a6, 4
+; RV32I-NEXT: or a1, a5, a6
+; RV32I-NEXT: sw a1, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a1, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, a1, 4
+; RV32I-NEXT: and t0, a1, a0
+; RV32I-NEXT: and a6, a6, a0
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: slli t0, t0, 4
+; RV32I-NEXT: or a0, a6, t0
+; RV32I-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a0, a2
+; RV32I-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, a2, a0
+; RV32I-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a2, a0
+; RV32I-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, s11
+; RV32I-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, ra, a0
+; RV32I-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, a2, a0
+; RV32I-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s10, a0
+; RV32I-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, a2, a0
+; RV32I-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli s6, a0, 4
+; RV32I-NEXT: and s10, a0, a1
+; RV32I-NEXT: and s6, s6, a1
+; RV32I-NEXT: slli s10, s10, 4
+; RV32I-NEXT: or a0, s6, s10
+; RV32I-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli s10, a0, 4
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: and s10, s10, a1
; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: srli a2, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: srli a3, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: and a3, a3, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: srli a2, a1, 1
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: srli a3, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: and a3, a3, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or s0, a2, a1
-; RV32I-NEXT: or s8, a3, a0
-; RV32I-NEXT: andi a1, s8, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, a0, s1
-; RV32I-NEXT: andi a1, s8, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: andi a1, s8, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: andi a1, s8, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: and a1, s8, s11
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: lui s10, 16
-; RV32I-NEXT: and a1, s8, s10
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lw a3, 152(sp) # 4-byte Folded Reload
-; RV32I-NEXT: srli a1, a3, 8
-; RV32I-NEXT: xor a0, s9, a0
-; RV32I-NEXT: srli a2, a3, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: and a2, a3, s6
-; RV32I-NEXT: slli a3, a3, 24
+; RV32I-NEXT: or a0, s10, a0
+; RV32I-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, a0, s5
+; RV32I-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, a1, a0
+; RV32I-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, s4, a0
+; RV32I-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, a1, a0
+; RV32I-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, a1, a0
+; RV32I-NEXT: xor s4, s0, t1
+; RV32I-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, t2, a0
+; RV32I-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, a0
+; RV32I-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, a0
+; RV32I-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, a0
+; RV32I-NEXT: xor t6, a7, t6
+; RV32I-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a1, a0
+; RV32I-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a3, a0
+; RV32I-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a4, a0
+; RV32I-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a1, a0
+; RV32I-NEXT: xor s9, a5, s9
+; RV32I-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a6, a0
+; RV32I-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a0
+; RV32I-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a1, a0
+; RV32I-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a0, a3
+; RV32I-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 452(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, s10, a2
+; RV32I-NEXT: lw s10, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, s10, ra
+; RV32I-NEXT: lw s10, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, s7, s10
+; RV32I-NEXT: lw s10, 560(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s8, s10
+; RV32I-NEXT: lw s10, 464(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, s11, s10
+; RV32I-NEXT: xor s5, s6, s5
+; RV32I-NEXT: lw s6, 272(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, s1, s6
+; RV32I-NEXT: lw s6, 176(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s2, s6
+; RV32I-NEXT: lw s6, 84(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, s6
+; RV32I-NEXT: xor s0, s4, s0
+; RV32I-NEXT: lw s4, 276(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, s4
+; RV32I-NEXT: lw s4, 180(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, s4
+; RV32I-NEXT: lw s4, 88(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, s4
+; RV32I-NEXT: xor t0, t6, t0
+; RV32I-NEXT: lw t6, 284(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t1, t6
+; RV32I-NEXT: lw t6, 184(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, t2, t6
+; RV32I-NEXT: lw t6, 92(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, t6
+; RV32I-NEXT: xor a4, s9, a4
+; RV32I-NEXT: lw t6, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, a5, t6
+; RV32I-NEXT: lw a5, 472(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, a5
+; RV32I-NEXT: xor a0, a3, a0
+; RV32I-NEXT: lw a3, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a3
+; RV32I-NEXT: lw a3, 476(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: xor a3, ra, s7
+; RV32I-NEXT: lw a5, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, s8, a5
+; RV32I-NEXT: lw a5, 480(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s11, a5
+; RV32I-NEXT: xor s1, s5, s1
+; RV32I-NEXT: lw a5, 196(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s2, a5
+; RV32I-NEXT: lw a5, 100(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, a5
+; RV32I-NEXT: xor t3, s0, t3
+; RV32I-NEXT: lw a5, 200(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, a5
+; RV32I-NEXT: lw a5, 104(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, a5
+; RV32I-NEXT: xor t0, t0, t1
+; RV32I-NEXT: lw a5, 208(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t2, a5
+; RV32I-NEXT: lw a5, 112(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, a5
+; RV32I-NEXT: lw t2, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a5, t2, 2
+; RV32I-NEXT: lw s5, 740(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and t2, t2, s5
+; RV32I-NEXT: and a5, a5, s5
+; RV32I-NEXT: slli t2, t2, 2
+; RV32I-NEXT: or t2, a5, t2
+; RV32I-NEXT: lw s0, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a5, s0, 2
+; RV32I-NEXT: and s0, s0, s5
+; RV32I-NEXT: and a5, a5, s5
+; RV32I-NEXT: slli s0, s0, 2
+; RV32I-NEXT: or a5, a5, s0
+; RV32I-NEXT: xor a4, a4, t6
+; RV32I-NEXT: lw t6, 492(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, a6, t6
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: xor a2, a3, s4
+; RV32I-NEXT: lw a3, 496(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, s6, a3
+; RV32I-NEXT: lw s0, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, s0, 2
+; RV32I-NEXT: and s0, s0, s5
+; RV32I-NEXT: and a6, a6, s5
+; RV32I-NEXT: slli s0, s0, 2
+; RV32I-NEXT: or s0, a6, s0
+; RV32I-NEXT: lw s4, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, s4, 2
+; RV32I-NEXT: and s4, s4, s5
+; RV32I-NEXT: and a6, a6, s5
+; RV32I-NEXT: slli s4, s4, 2
+; RV32I-NEXT: or s4, a6, s4
+; RV32I-NEXT: xor s1, s1, s2
+; RV32I-NEXT: lw a6, 132(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s3, a6
+; RV32I-NEXT: xor t3, t3, t4
+; RV32I-NEXT: lw a6, 124(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t5, a6
+; RV32I-NEXT: xor t0, t0, t1
+; RV32I-NEXT: lw a6, 136(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, a6
+; RV32I-NEXT: xor a4, a4, t6
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: xor a1, s1, s2
+; RV32I-NEXT: xor a3, t3, t4
+; RV32I-NEXT: xor a7, t0, a7
+; RV32I-NEXT: lw a6, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a6
+; RV32I-NEXT: lw a6, 728(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a6
+; RV32I-NEXT: lw a6, 736(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a6
+; RV32I-NEXT: lw a6, 724(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a1, a6
+; RV32I-NEXT: lw a1, 732(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a1
+; RV32I-NEXT: lw a1, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, a1
+; RV32I-NEXT: srli a1, t2, 1
+; RV32I-NEXT: lw s2, 744(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and t1, t2, s2
+; RV32I-NEXT: and a1, a1, s2
+; RV32I-NEXT: slli t1, t1, 1
+; RV32I-NEXT: or a1, a1, t1
+; RV32I-NEXT: xor a0, a0, a4
+; RV32I-NEXT: sw a0, 736(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a0, s0, 1
+; RV32I-NEXT: and a4, s0, s2
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: or t2, a0, a4
+; RV32I-NEXT: xor a0, a3, t0
+; RV32I-NEXT: sw a0, 732(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a0, a2, 8
+; RV32I-NEXT: lw a6, 756(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a0, a0, a6
+; RV32I-NEXT: srli a3, a2, 24
+; RV32I-NEXT: or a3, a0, a3
+; RV32I-NEXT: slli a0, a2, 24
+; RV32I-NEXT: and a2, a2, a6
; RV32I-NEXT: slli a2, a2, 8
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a3, a0, 8
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: and a3, a3, s6
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: slli a3, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: and a2, a3, s5
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: srli a3, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: and a2, a3, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: and a2, a3, s4
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: srli a3, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: and a2, a3, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: and a2, a3, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: srli a3, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: or s1, a2, a1
-; RV32I-NEXT: lw a1, 168(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a3, a1
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: andi a1, s7, 2
-; RV32I-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 1
-; RV32I-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s8, a0, s0
-; RV32I-NEXT: andi a1, s7, 4
-; RV32I-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 8
-; RV32I-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: andi a1, s7, 16
-; RV32I-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 32
-; RV32I-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 64
-; RV32I-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: andi a1, s7, 128
-; RV32I-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 256
-; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 512
-; RV32I-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 1024
-; RV32I-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: and a1, s7, s11
-; RV32I-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: and a1, s7, s10
-; RV32I-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and s10, s7, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s10
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and s9, s7, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s9
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: lw a2, 156(sp) # 4-byte Folded Reload
-; RV32I-NEXT: srli a1, a2, 8
-; RV32I-NEXT: xor s7, s8, a0
-; RV32I-NEXT: srli a0, a2, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: and a1, a2, s6
-; RV32I-NEXT: slli a2, a2, 24
-; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or s8, a1, a0
-; RV32I-NEXT: andi a1, s8, 2
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, a0, s1
-; RV32I-NEXT: andi a1, s8, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s8, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s8, 128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 256
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 512
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 1024
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: and a1, s8, s11
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: or a2, a0, a2
+; RV32I-NEXT: srli a0, a7, 8
+; RV32I-NEXT: and a0, a0, a6
+; RV32I-NEXT: srli a4, a7, 24
+; RV32I-NEXT: or a4, a0, a4
+; RV32I-NEXT: slli a0, a7, 24
+; RV32I-NEXT: and a7, a7, a6
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a7, a0, a7
+; RV32I-NEXT: srli a0, a5, 1
+; RV32I-NEXT: and a5, a5, s2
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: or a0, a0, a5
+; RV32I-NEXT: srli a5, a5, 31
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli t0, a1, 31
+; RV32I-NEXT: and a5, a5, t0
+; RV32I-NEXT: sw a5, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: sw a2, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a2, s4, 1
+; RV32I-NEXT: and a3, s4, s2
+; RV32I-NEXT: and a2, a2, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or t4, a2, a3
+; RV32I-NEXT: srli a3, a3, 31
+; RV32I-NEXT: seqz a2, a3
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 31
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 728(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a2, a7, a4
+; RV32I-NEXT: sw a2, 724(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 1
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 4
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 2
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 8
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 3
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 16
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 4
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 32
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 5
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 64
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 6
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 128
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 7
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 256
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 8
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 512
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 9
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 1024
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 10
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a4, 456(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a2, a0, a4
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 11
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 652(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 1
+; RV32I-NEXT: and a2, a0, a5
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 12
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a6, 2
+; RV32I-NEXT: and a2, a0, a6
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 13
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a7, 4
+; RV32I-NEXT: and a2, a0, a7
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 14
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t0, 8
+; RV32I-NEXT: and a2, a0, t0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 15
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t1, 16
+; RV32I-NEXT: and a2, a0, t1
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 16
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s3, 32
+; RV32I-NEXT: and a2, a0, s3
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 17
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t5, 64
+; RV32I-NEXT: and a2, a0, t5
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 18
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t6, 128
+; RV32I-NEXT: and a2, a0, t6
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 19
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s0, 256
+; RV32I-NEXT: and a2, a0, s0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 20
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s4, 512
+; RV32I-NEXT: and a2, a0, s4
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 21
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s6, 1024
+; RV32I-NEXT: and a2, a0, s6
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 22
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s8, 2048
+; RV32I-NEXT: and a2, a0, s8
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 23
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s10, 4096
+; RV32I-NEXT: and a2, a0, s10
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 24
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 8192
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 25
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 16384
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 26
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 32768
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 27
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 65536
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 28
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 131072
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, a1, 29
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 1
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a2, a2, a1
+; RV32I-NEXT: sw a2, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a1, a1, 30
+; RV32I-NEXT: lui t3, 262144
+; RV32I-NEXT: and a0, a0, t3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 2
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 8
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 3
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 16
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 4
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 32
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 5
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 64
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 6
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 128
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 7
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 256
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 8
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 512
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 9
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t4, 1024
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 10
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t4, a4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 11
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t4, a5
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 12
+; RV32I-NEXT: and s9, a0, a1
+; RV32I-NEXT: and a0, t4, a6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli s1, t2, 13
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t4, a7
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 14
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t4, t0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 15
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t4, t1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a4, t2, 16
+; RV32I-NEXT: and s7, a0, a4
+; RV32I-NEXT: and a0, t4, s3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a2, t2, 17
+; RV32I-NEXT: and s3, a0, a2
+; RV32I-NEXT: and a0, t4, t5
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 18
+; RV32I-NEXT: and s11, a0, a1
+; RV32I-NEXT: and a0, t4, t6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 19
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t4, s0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 20
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t4, s4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli s0, t2, 21
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t4, s6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t2, 22
+; RV32I-NEXT: and s0, a0, a1
+; RV32I-NEXT: and a1, t4, s8
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a3, t2, 23
+; RV32I-NEXT: and t6, a1, a3
+; RV32I-NEXT: and a2, t4, s10
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a3, t2, 24
+; RV32I-NEXT: and s1, a2, a3
; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and a3, t4, a0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli a5, t2, 25
+; RV32I-NEXT: and s4, a3, a5
; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and a5, t4, a0
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a7, t2, 26
+; RV32I-NEXT: and s6, a5, a7
; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and a7, t4, a0
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: slli t0, t2, 27
+; RV32I-NEXT: and s8, a7, t0
; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
+; RV32I-NEXT: and t0, t4, a0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: slli t1, t2, 28
+; RV32I-NEXT: and s10, t0, t1
; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor a0, a0, s7
-; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: xor a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: slli a3, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: lw a2, 168(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli s7, a0, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 152(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, a0, s1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 132(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s10
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s9
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lw a1, 140(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: lw a2, 144(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: slli a2, a2, 1
-; RV32I-NEXT: srli a3, a0, 8
-; RV32I-NEXT: srli a4, a0, 24
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: slli a2, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: and a3, a3, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: srli a1, a1, 1
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a2, a2, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: srli a2, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: slli a3, a2, 31
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a3, s7, a3
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: lw a2, 160(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw a4, 148(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a4, 0(a2)
-; RV32I-NEXT: sw a1, 4(a2)
-; RV32I-NEXT: sw a3, 8(a2)
-; RV32I-NEXT: sw a0, 12(a2)
-; RV32I-NEXT: lw a2, 164(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a4, 0(a2)
-; RV32I-NEXT: sw a1, 4(a2)
-; RV32I-NEXT: sw a3, 8(a2)
-; RV32I-NEXT: sw a0, 12(a2)
-; RV32I-NEXT: lw ra, 220(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 216(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 212(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 208(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 204(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 200(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 196(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 192(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 188(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 184(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 180(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s10, 176(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s11, 172(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 224
+; RV32I-NEXT: and t1, t4, a0
+; RV32I-NEXT: seqz t1, t1
+; RV32I-NEXT: addi t1, t1, -1
+; RV32I-NEXT: slli a6, t2, 29
+; RV32I-NEXT: and t5, t1, a6
+; RV32I-NEXT: and t1, t4, t3
+; RV32I-NEXT: andi t4, t4, 1
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: addi t4, t4, -1
+; RV32I-NEXT: and t4, t4, t2
+; RV32I-NEXT: slli t2, t2, 30
+; RV32I-NEXT: seqz t1, t1
+; RV32I-NEXT: addi t1, t1, -1
+; RV32I-NEXT: and t1, t1, t2
+; RV32I-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a1, a0
+; RV32I-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: lw a3, 652(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a4, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: lw a4, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a5, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a6, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a6
+; RV32I-NEXT: lw a6, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a7, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, a7
+; RV32I-NEXT: lw a7, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, a7
+; RV32I-NEXT: lw a7, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw t0, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw t3, 564(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t3
+; RV32I-NEXT: lw t3, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw ra, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, ra
+; RV32I-NEXT: lw ra, 532(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, ra, s9
+; RV32I-NEXT: xor s3, s7, s3
+; RV32I-NEXT: xor t6, s0, t6
+; RV32I-NEXT: xor t1, t5, t1
+; RV32I-NEXT: xor a0, t2, a0
+; RV32I-NEXT: lw t2, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, t2
+; RV32I-NEXT: lw t2, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, t2
+; RV32I-NEXT: lw t2, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, t2
+; RV32I-NEXT: lw t2, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a4, t2
+; RV32I-NEXT: lw a4, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a4
+; RV32I-NEXT: lw a4, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, a4
+; RV32I-NEXT: lw t5, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a4, t5, 4
+; RV32I-NEXT: lw s7, 760(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and t5, t5, s7
+; RV32I-NEXT: and a4, a4, s7
+; RV32I-NEXT: slli t5, t5, 4
+; RV32I-NEXT: or t5, a4, t5
+; RV32I-NEXT: xor a7, t4, a7
+; RV32I-NEXT: lw a4, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, a4
+; RV32I-NEXT: lw a4, 560(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, a4
+; RV32I-NEXT: lw a4, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, s9, a4
+; RV32I-NEXT: xor s0, s3, s11
+; RV32I-NEXT: xor t6, t6, s1
+; RV32I-NEXT: lw a4, 728(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, t1, a4
+; RV32I-NEXT: lw s1, 724(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t1, s1, 4
+; RV32I-NEXT: and s1, s1, s7
+; RV32I-NEXT: and t1, t1, s7
+; RV32I-NEXT: slli s1, s1, 4
+; RV32I-NEXT: or t1, t1, s1
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a3, a2
+; RV32I-NEXT: lw a3, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, t2, a3
+; RV32I-NEXT: lw t2, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t2
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t3, t0
+; RV32I-NEXT: lw t2, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, t4, t2
+; RV32I-NEXT: lw t3, 524(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, s0, t3
+; RV32I-NEXT: xor t4, t6, s4
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a3, a2
+; RV32I-NEXT: lw a3, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a5, a3
+; RV32I-NEXT: xor a5, a7, t0
+; RV32I-NEXT: lw a7, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, t2, a7
+; RV32I-NEXT: lw t0, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t3, t0
+; RV32I-NEXT: xor t2, t4, s6
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a3, a2
+; RV32I-NEXT: srli a3, t5, 2
+; RV32I-NEXT: and t3, t5, s5
+; RV32I-NEXT: and a3, a3, s5
+; RV32I-NEXT: slli t3, t3, 2
+; RV32I-NEXT: or a3, a3, t3
+; RV32I-NEXT: xor a5, a5, a7
+; RV32I-NEXT: lw a7, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, t0, a7
+; RV32I-NEXT: xor t0, t2, s8
+; RV32I-NEXT: srli t2, t1, 2
+; RV32I-NEXT: and t1, t1, s5
+; RV32I-NEXT: and t2, t2, s5
+; RV32I-NEXT: slli t1, t1, 2
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a1
+; RV32I-NEXT: xor a5, a5, a7
+; RV32I-NEXT: xor a7, t0, s10
+; RV32I-NEXT: srli a1, a3, 1
+; RV32I-NEXT: xor a2, a0, a2
+; RV32I-NEXT: srli a0, t1, 1
+; RV32I-NEXT: and a3, a3, s2
+; RV32I-NEXT: and t0, t1, s2
+; RV32I-NEXT: xor a5, a5, a7
+; RV32I-NEXT: and a7, a1, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: xor a6, a2, a6
+; RV32I-NEXT: and t1, a0, s2
+; RV32I-NEXT: slli t0, t0, 1
+; RV32I-NEXT: xor a4, a5, a4
+; RV32I-NEXT: or a2, a7, a3
+; RV32I-NEXT: or a3, t1, t0
+; RV32I-NEXT: srli a5, a6, 8
+; RV32I-NEXT: srli a7, a6, 24
+; RV32I-NEXT: slli t0, a6, 24
+; RV32I-NEXT: lw t2, 756(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a6, a6, t2
+; RV32I-NEXT: and a5, a5, t2
+; RV32I-NEXT: or a5, a5, a7
+; RV32I-NEXT: srli a7, a4, 8
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a6, t0, a6
+; RV32I-NEXT: srli t0, a4, 24
+; RV32I-NEXT: and a7, a7, t2
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: slli t0, a4, 24
+; RV32I-NEXT: and a4, a4, t2
+; RV32I-NEXT: slli a4, a4, 8
+; RV32I-NEXT: or t0, t0, a4
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: lui a4, 349525
+; RV32I-NEXT: addi a4, a4, 1364
+; RV32I-NEXT: or a6, t0, a7
+; RV32I-NEXT: srli a7, a5, 4
+; RV32I-NEXT: and a5, a5, s7
+; RV32I-NEXT: and a7, a7, s7
+; RV32I-NEXT: slli a5, a5, 4
+; RV32I-NEXT: or a5, a7, a5
+; RV32I-NEXT: srli a7, a6, 4
+; RV32I-NEXT: and a6, a6, s7
+; RV32I-NEXT: and a7, a7, s7
+; RV32I-NEXT: slli a6, a6, 4
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: srli a7, a5, 2
+; RV32I-NEXT: and a5, a5, s5
+; RV32I-NEXT: and a7, a7, s5
+; RV32I-NEXT: slli a5, a5, 2
+; RV32I-NEXT: or a5, a7, a5
+; RV32I-NEXT: srli a7, a6, 2
+; RV32I-NEXT: and a6, a6, s5
+; RV32I-NEXT: and a7, a7, s5
+; RV32I-NEXT: slli a6, a6, 2
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: srli a7, a5, 1
+; RV32I-NEXT: and a5, a5, s2
+; RV32I-NEXT: and a7, a7, a4
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: or a5, a7, a5
+; RV32I-NEXT: srli a7, a6, 1
+; RV32I-NEXT: and a6, a6, s2
+; RV32I-NEXT: and a7, a7, a4
+; RV32I-NEXT: slli a6, a6, 1
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: srli a5, a5, 1
+; RV32I-NEXT: srli a6, a6, 1
+; RV32I-NEXT: lw a7, 736(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a7
+; RV32I-NEXT: lw a7, 732(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, a7
+; RV32I-NEXT: srli a7, a5, 8
+; RV32I-NEXT: srli t0, a5, 24
+; RV32I-NEXT: slli t1, a5, 24
+; RV32I-NEXT: and a5, a5, t2
+; RV32I-NEXT: and a7, a7, t2
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: srli t0, a6, 8
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: or a5, t1, a5
+; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: and t0, t0, t2
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: and t1, a6, t2
+; RV32I-NEXT: slli a6, a6, 24
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or a6, a6, t1
+; RV32I-NEXT: or a5, a5, a7
+; RV32I-NEXT: or a6, a6, t0
+; RV32I-NEXT: srli a7, a5, 4
+; RV32I-NEXT: and a5, a5, s7
+; RV32I-NEXT: srli t0, a6, 4
+; RV32I-NEXT: and a6, a6, s7
+; RV32I-NEXT: and a7, a7, s7
+; RV32I-NEXT: and t0, t0, s7
+; RV32I-NEXT: slli a5, a5, 4
+; RV32I-NEXT: slli a6, a6, 4
+; RV32I-NEXT: or a5, a7, a5
+; RV32I-NEXT: or a6, t0, a6
+; RV32I-NEXT: srli a7, a5, 2
+; RV32I-NEXT: and a5, a5, s5
+; RV32I-NEXT: srli t0, a6, 2
+; RV32I-NEXT: and a6, a6, s5
+; RV32I-NEXT: and a7, a7, s5
+; RV32I-NEXT: and t0, t0, s5
+; RV32I-NEXT: slli a5, a5, 2
+; RV32I-NEXT: or a5, a7, a5
+; RV32I-NEXT: slli a6, a6, 2
+; RV32I-NEXT: or a6, t0, a6
+; RV32I-NEXT: srli a7, a5, 1
+; RV32I-NEXT: and a5, a5, s2
+; RV32I-NEXT: and t0, a6, s2
+; RV32I-NEXT: srli a6, a6, 1
+; RV32I-NEXT: and a7, a7, a4
+; RV32I-NEXT: and a4, a6, a4
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: or a5, a7, a5
+; RV32I-NEXT: slli t0, t0, 1
+; RV32I-NEXT: or a4, a4, t0
+; RV32I-NEXT: slli a1, a1, 31
+; RV32I-NEXT: srli a5, a5, 1
+; RV32I-NEXT: or a1, a5, a1
+; RV32I-NEXT: slli a0, a0, 31
+; RV32I-NEXT: srli a4, a4, 1
+; RV32I-NEXT: or a0, a4, a0
+; RV32I-NEXT: srli a2, a2, 1
+; RV32I-NEXT: srli a3, a3, 1
+; RV32I-NEXT: lw a4, 748(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a1, 0(a4)
+; RV32I-NEXT: sw a2, 4(a4)
+; RV32I-NEXT: sw a0, 8(a4)
+; RV32I-NEXT: sw a3, 12(a4)
+; RV32I-NEXT: lw a4, 752(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a1, 0(a4)
+; RV32I-NEXT: sw a2, 4(a4)
+; RV32I-NEXT: sw a0, 8(a4)
+; RV32I-NEXT: sw a3, 12(a4)
+; RV32I-NEXT: lw ra, 812(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 808(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 804(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 800(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 796(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 792(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 788(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 784(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 780(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 776(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 772(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 768(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s11, 764(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 816
; RV32I-NEXT: ret
;
; RV64I-LABEL: commutative_clmulh_v2i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -400
-; RV64I-NEXT: sd ra, 392(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 384(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 376(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 368(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 360(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 352(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 344(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 336(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 328(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 320(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 312(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s10, 304(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s11, 296(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a5, 288(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a4, 280(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s3, a3
-; RV64I-NEXT: mv s2, a1
-; RV64I-NEXT: srli a1, a2, 24
-; RV64I-NEXT: lui t5, 4080
-; RV64I-NEXT: srli a3, a2, 8
-; RV64I-NEXT: li s11, 255
-; RV64I-NEXT: srli a4, a2, 40
-; RV64I-NEXT: lui t6, 16
-; RV64I-NEXT: srli a5, a2, 56
-; RV64I-NEXT: srliw a6, a2, 24
-; RV64I-NEXT: lui a7, 61681
-; RV64I-NEXT: lui t0, 209715
-; RV64I-NEXT: lui t1, 349525
-; RV64I-NEXT: srli t2, a0, 24
-; RV64I-NEXT: srli t3, a0, 8
-; RV64I-NEXT: srli t4, a0, 40
-; RV64I-NEXT: and a1, a1, t5
-; RV64I-NEXT: slli s11, s11, 24
+; RV64I-NEXT: addi sp, sp, -1120
+; RV64I-NEXT: sd ra, 1112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 1104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 1096(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 1088(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 1080(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 1072(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 1064(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 1056(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 1048(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 1040(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 1032(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 1024(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 1016(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a5, 1008(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a4, 1000(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a4, a0
+; RV64I-NEXT: srli a5, a2, 24
+; RV64I-NEXT: lui s3, 4080
+; RV64I-NEXT: srli a6, a2, 8
+; RV64I-NEXT: li t4, 255
+; RV64I-NEXT: srli t0, a2, 40
+; RV64I-NEXT: lui a0, 16
+; RV64I-NEXT: srli t1, a2, 56
+; RV64I-NEXT: srliw t2, a2, 24
+; RV64I-NEXT: slli a7, a2, 56
+; RV64I-NEXT: lui s6, 61681
+; RV64I-NEXT: lui t6, 209715
+; RV64I-NEXT: lui s4, 349525
+; RV64I-NEXT: srli s5, a4, 24
+; RV64I-NEXT: srli t3, a4, 8
+; RV64I-NEXT: srli t5, a4, 40
+; RV64I-NEXT: srli s2, a4, 56
+; RV64I-NEXT: srliw s0, a4, 24
+; RV64I-NEXT: slli ra, a4, 56
+; RV64I-NEXT: srli s8, a3, 24
+; RV64I-NEXT: srli s10, a3, 8
+; RV64I-NEXT: srli s7, a3, 40
+; RV64I-NEXT: srli s9, a3, 56
+; RV64I-NEXT: and a5, a5, s3
+; RV64I-NEXT: slli s1, t4, 24
+; RV64I-NEXT: and a6, a6, s1
+; RV64I-NEXT: or a6, a6, a5
+; RV64I-NEXT: addi a0, a0, -256
+; RV64I-NEXT: and a5, t0, a0
+; RV64I-NEXT: or t0, a5, t1
+; RV64I-NEXT: and a5, a2, s3
+; RV64I-NEXT: slli t2, t2, 32
+; RV64I-NEXT: addi t1, s6, -241
+; RV64I-NEXT: addi s11, t6, 819
+; RV64I-NEXT: addi t4, s4, 1365
+; RV64I-NEXT: slli a5, a5, 24
+; RV64I-NEXT: or a5, a5, t2
+; RV64I-NEXT: slli s4, t1, 32
+; RV64I-NEXT: add s4, t1, s4
+; RV64I-NEXT: slli t1, s11, 32
+; RV64I-NEXT: add s11, s11, t1
+; RV64I-NEXT: slli t1, t4, 32
+; RV64I-NEXT: add s6, t4, t1
+; RV64I-NEXT: srliw t2, a3, 24
+; RV64I-NEXT: and t1, s5, s3
+; RV64I-NEXT: and t3, t3, s1
+; RV64I-NEXT: or t1, t3, t1
+; RV64I-NEXT: srli t3, a1, 24
+; RV64I-NEXT: and t4, t5, a0
+; RV64I-NEXT: or t4, t4, s2
+; RV64I-NEXT: and t5, a4, s3
+; RV64I-NEXT: slli s0, s0, 32
+; RV64I-NEXT: slli t5, t5, 24
+; RV64I-NEXT: or t6, t5, s0
+; RV64I-NEXT: srli t5, a1, 8
+; RV64I-NEXT: and s0, s8, s3
+; RV64I-NEXT: mv s8, s1
+; RV64I-NEXT: sd s1, 992(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and s1, s10, s1
+; RV64I-NEXT: or s0, s1, s0
+; RV64I-NEXT: srli s1, a1, 40
+; RV64I-NEXT: and s2, s7, a0
+; RV64I-NEXT: or s2, s2, s9
+; RV64I-NEXT: and s5, a3, s3
+; RV64I-NEXT: slli t2, t2, 32
+; RV64I-NEXT: slli s5, s5, 24
+; RV64I-NEXT: or t2, s5, t2
+; RV64I-NEXT: srli s5, a1, 56
+; RV64I-NEXT: and t3, t3, s3
+; RV64I-NEXT: and t5, t5, s8
+; RV64I-NEXT: or t3, t5, t3
+; RV64I-NEXT: srliw t5, a1, 24
+; RV64I-NEXT: and s1, s1, a0
+; RV64I-NEXT: or s1, s1, s5
+; RV64I-NEXT: and s5, a1, s3
+; RV64I-NEXT: slli t5, t5, 32
+; RV64I-NEXT: slli s5, s5, 24
+; RV64I-NEXT: or s5, s5, t5
+; RV64I-NEXT: li t5, 1
+; RV64I-NEXT: or a6, a6, t0
+; RV64I-NEXT: slli t0, a3, 56
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: sd a0, 976(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a2, a0
+; RV64I-NEXT: slli a2, a2, 40
+; RV64I-NEXT: or a2, a7, a2
+; RV64I-NEXT: slli a7, a1, 56
+; RV64I-NEXT: or t1, t1, t4
+; RV64I-NEXT: slli s7, t5, 11
+; RV64I-NEXT: sd s7, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, a4, a0
+; RV64I-NEXT: and a3, a3, s3
+; RV64I-NEXT: and a1, a1, s3
+; RV64I-NEXT: slli a0, a0, 40
+; RV64I-NEXT: slli a3, a3, 40
+; RV64I-NEXT: slli a1, a1, 40
+; RV64I-NEXT: or a0, ra, a0
+; RV64I-NEXT: or a4, s0, s2
+; RV64I-NEXT: or a3, t0, a3
+; RV64I-NEXT: or t0, t3, s1
+; RV64I-NEXT: or a1, a7, a1
+; RV64I-NEXT: or a2, a2, a5
+; RV64I-NEXT: or a0, a0, t6
+; RV64I-NEXT: or a3, a3, t2
+; RV64I-NEXT: or a1, a1, s5
+; RV64I-NEXT: or a2, a2, a6
+; RV64I-NEXT: or a0, a0, t1
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: or a1, a1, t0
+; RV64I-NEXT: srli a4, a2, 4
+; RV64I-NEXT: sd s4, 984(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a2, s4
+; RV64I-NEXT: srli a5, a0, 4
+; RV64I-NEXT: and a0, a0, s4
+; RV64I-NEXT: srli a6, a3, 4
+; RV64I-NEXT: and a3, a3, s4
+; RV64I-NEXT: srli a7, a1, 4
+; RV64I-NEXT: and a1, a1, s4
+; RV64I-NEXT: and a4, a4, s4
+; RV64I-NEXT: slli a2, a2, 4
+; RV64I-NEXT: and a5, a5, s4
+; RV64I-NEXT: slli a0, a0, 4
+; RV64I-NEXT: and a6, a6, s4
+; RV64I-NEXT: slli a3, a3, 4
+; RV64I-NEXT: and a7, a7, s4
+; RV64I-NEXT: slli a1, a1, 4
+; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: or a0, a5, a0
+; RV64I-NEXT: or a3, a6, a3
+; RV64I-NEXT: or a1, a7, a1
+; RV64I-NEXT: srli a4, a2, 2
+; RV64I-NEXT: sd s11, 968(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a2, s11
+; RV64I-NEXT: srli a5, a0, 2
+; RV64I-NEXT: and a0, a0, s11
+; RV64I-NEXT: srli a6, a3, 2
; RV64I-NEXT: and a3, a3, s11
-; RV64I-NEXT: or a1, a3, a1
-; RV64I-NEXT: addi s10, t6, -256
-; RV64I-NEXT: and a3, a4, s10
-; RV64I-NEXT: or a3, a3, a5
-; RV64I-NEXT: and a4, a2, t5
-; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: addi s9, a7, -241
-; RV64I-NEXT: addi s8, t0, 819
-; RV64I-NEXT: addi s7, t1, 1365
-; RV64I-NEXT: slli a4, a4, 24
-; RV64I-NEXT: or a4, a4, a6
-; RV64I-NEXT: slli a5, s9, 32
-; RV64I-NEXT: add s9, s9, a5
-; RV64I-NEXT: slli a5, s8, 32
-; RV64I-NEXT: add s8, s8, a5
-; RV64I-NEXT: slli a5, s7, 32
-; RV64I-NEXT: add s7, s7, a5
+; RV64I-NEXT: srli a7, a1, 2
+; RV64I-NEXT: and a1, a1, s11
+; RV64I-NEXT: and a4, a4, s11
+; RV64I-NEXT: slli a2, a2, 2
+; RV64I-NEXT: and a5, a5, s11
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: and a6, a6, s11
+; RV64I-NEXT: slli a3, a3, 2
+; RV64I-NEXT: and a7, a7, s11
+; RV64I-NEXT: slli a1, a1, 2
+; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: or a0, a5, a0
+; RV64I-NEXT: or a3, a6, a3
+; RV64I-NEXT: or a1, a7, a1
+; RV64I-NEXT: srli a4, a2, 1
+; RV64I-NEXT: sd s6, 960(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a2, s6
+; RV64I-NEXT: srli a5, a0, 1
+; RV64I-NEXT: and a0, a0, s6
+; RV64I-NEXT: srli a6, a3, 1
+; RV64I-NEXT: and a3, a3, s6
+; RV64I-NEXT: srli a7, a1, 1
+; RV64I-NEXT: and a1, a1, s6
+; RV64I-NEXT: and a4, a4, s6
+; RV64I-NEXT: slli a2, a2, 1
+; RV64I-NEXT: and a5, a5, s6
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: and a6, a6, s6
+; RV64I-NEXT: slli t0, a3, 1
+; RV64I-NEXT: and a7, a7, s6
+; RV64I-NEXT: slli a3, a1, 1
+; RV64I-NEXT: or s0, a4, a2
+; RV64I-NEXT: or s1, a5, a0
+; RV64I-NEXT: srli a2, a0, 63
+; RV64I-NEXT: or a1, a6, t0
+; RV64I-NEXT: or t0, a7, a3
+; RV64I-NEXT: srli a3, a3, 63
+; RV64I-NEXT: slli a4, s0, 1
+; RV64I-NEXT: andi a5, s1, 2
+; RV64I-NEXT: slli a6, s0, 2
+; RV64I-NEXT: andi a7, s1, 4
+; RV64I-NEXT: slli a0, s0, 3
+; RV64I-NEXT: andi t1, s1, 8
+; RV64I-NEXT: slli t2, s0, 4
+; RV64I-NEXT: andi t3, s1, 16
+; RV64I-NEXT: slli t4, s0, 5
+; RV64I-NEXT: andi t6, s1, 32
+; RV64I-NEXT: slli s2, s0, 63
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a2, a2, s2
+; RV64I-NEXT: sd a2, 944(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a1, 63
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 952(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, s0, 6
+; RV64I-NEXT: seqz a3, a5
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: sd a3, 456(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a3, s1, 64
+; RV64I-NEXT: seqz a4, a7
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, a6
+; RV64I-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a4, s0, 7
+; RV64I-NEXT: seqz a5, t1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: and a0, a5, a0
+; RV64I-NEXT: sd a0, 912(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a5, s1, 128
+; RV64I-NEXT: seqz a6, t3
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a0, a6, t2
+; RV64I-NEXT: sd a0, 896(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a6, s0, 8
+; RV64I-NEXT: seqz a7, t6
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a0, a7, t4
+; RV64I-NEXT: sd a0, 472(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a7, s1, 256
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 928(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, s0, 9
+; RV64I-NEXT: seqz a3, a5
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: sd a3, 464(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a3, s1, 512
+; RV64I-NEXT: seqz a4, a7
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a0, a4, a6
+; RV64I-NEXT: sd a0, 872(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a4, s0, 10
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 904(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a2, s1, 1024
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: sd a2, 936(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, s7
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 11
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 848(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 1
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 12
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 840(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 2
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 13
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 864(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 4
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 14
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 888(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 8
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 15
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 920(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 16
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 800(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 32
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 17
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 784(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 64
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 18
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 816(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 128
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 19
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 832(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 256
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 20
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 856(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 512
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 21
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 880(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 1024
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 22
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 736(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 2048
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 23
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 728(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 4096
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 24
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 760(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 8192
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 25
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 776(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 16384
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 26
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 792(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 32768
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 27
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 808(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 65536
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 28
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 824(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 131072
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 29
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 672(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 262144
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 30
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 656(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sraiw a2, s1, 31
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 31
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 696(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 32
+; RV64I-NEXT: sd a2, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 32
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 712(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 33
+; RV64I-NEXT: sd a2, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 33
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 720(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 34
+; RV64I-NEXT: sd a2, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 34
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 744(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 35
+; RV64I-NEXT: sd a2, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 35
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 752(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 36
+; RV64I-NEXT: sd a2, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 36
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 768(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 37
+; RV64I-NEXT: sd a2, 392(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 37
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 600(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 38
+; RV64I-NEXT: sd a2, 376(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 38
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 584(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 39
+; RV64I-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 39
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 632(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 40
+; RV64I-NEXT: sd a2, 360(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 40
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 640(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 41
+; RV64I-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 41
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 648(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli ra, t5, 42
+; RV64I-NEXT: and a2, s1, ra
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 42
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 664(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a3, t5, 43
+; RV64I-NEXT: and a2, s1, a3
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a4, s0, 43
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: sd a2, 680(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 44
+; RV64I-NEXT: and a4, s1, a2
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 44
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 688(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s11, t5, 45
+; RV64I-NEXT: and a4, s1, s11
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 45
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 704(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s8, t5, 46
+; RV64I-NEXT: and a4, s1, s8
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 46
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 544(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s7, t5, 47
+; RV64I-NEXT: and a4, s1, s7
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 47
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 536(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s6, t5, 48
+; RV64I-NEXT: and a4, s1, s6
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 48
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 552(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s5, t5, 49
+; RV64I-NEXT: and a4, s1, s5
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 49
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 560(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s4, t5, 50
+; RV64I-NEXT: and a4, s1, s4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 50
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 568(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s3, t5, 51
+; RV64I-NEXT: and a4, s1, s3
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 51
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 576(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t1, t5, 52
+; RV64I-NEXT: and a4, s1, t1
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 52
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 592(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a7, t5, 53
+; RV64I-NEXT: and a4, s1, a7
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 53
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 608(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a6, t5, 54
+; RV64I-NEXT: and a4, s1, a6
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 54
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 624(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, t5, 55
+; RV64I-NEXT: and a4, s1, a0
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 55
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 616(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s2, t5, 56
+; RV64I-NEXT: and a4, s1, s2
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 56
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 488(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t6, t5, 57
+; RV64I-NEXT: and a4, s1, t6
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 57
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 480(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t4, t5, 58
+; RV64I-NEXT: and a4, s1, t4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 58
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t3, t5, 59
+; RV64I-NEXT: and a4, s1, t3
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 59
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s10, t5, 60
+; RV64I-NEXT: and a4, s1, s10
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 60
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 512(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s9, t5, 61
+; RV64I-NEXT: and a4, s1, s9
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 61
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 520(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t2, t5, 62
+; RV64I-NEXT: andi a4, s1, 1
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and t5, a4, s0
+; RV64I-NEXT: slli s0, s0, 62
+; RV64I-NEXT: and a4, s1, t2
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, s0
+; RV64I-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 2
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 1
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 2
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 344(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 8
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 3
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 320(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 16
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 312(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 32
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 5
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 296(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 64
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 6
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 352(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 128
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 7
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 280(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 256
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 8
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 512
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 9
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 304(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 1024
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 10
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 336(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 11
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 216(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 13
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 288(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 8
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 15
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 16
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 17
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 176(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 64
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 200(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 128
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 19
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 232(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 256
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 20
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 512
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 21
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 272(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1024
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 22
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2048
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4096
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 24
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 160(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 8192
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 25
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 168(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16384
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 26
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 192(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32768
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 27
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 208(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 65536
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 28
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 224(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 131072
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli s1, a1, 29
+; RV64I-NEXT: and a4, a4, s1
+; RV64I-NEXT: sd a4, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 262144
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 30
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sraiw a4, t0, 31
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 31
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 32
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 33
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 34
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 35
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 36
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 392(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 37
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 376(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 38
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 39
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 360(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 40
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 360(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 328(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli s0, a1, 41
+; RV64I-NEXT: and a4, a4, s0
+; RV64I-NEXT: sd a4, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, t0, ra
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 42
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 376(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, t0, a3
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: slli a4, a1, 43
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: sd a3, 392(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 44
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s11
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 45
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s8
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a5, a1, 46
+; RV64I-NEXT: and a2, a2, a5
+; RV64I-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s7
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a4, a1, 47
+; RV64I-NEXT: and s11, a2, a4
+; RV64I-NEXT: and a2, t0, s6
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 48
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s5
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 49
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s4
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 50
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s3
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 51
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, t1
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 52
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, a7
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 53
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, a6
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 54
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, a0
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 55
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 56
+; RV64I-NEXT: and s6, a2, a3
+; RV64I-NEXT: and a3, t0, t6
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: slli a4, a1, 57
+; RV64I-NEXT: and s5, a3, a4
+; RV64I-NEXT: and a4, t0, t4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 58
+; RV64I-NEXT: and s7, a4, a5
+; RV64I-NEXT: and a5, t0, t3
+; RV64I-NEXT: seqz a5, a5
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a0, a1, 59
+; RV64I-NEXT: and s8, a5, a0
+; RV64I-NEXT: and a0, t0, s10
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli s10, a1, 60
+; RV64I-NEXT: and s10, a0, s10
+; RV64I-NEXT: and a0, t0, s9
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli s9, a1, 61
+; RV64I-NEXT: and s9, a0, s9
+; RV64I-NEXT: and a0, t0, t2
+; RV64I-NEXT: andi t0, t0, 1
+; RV64I-NEXT: seqz t0, t0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: and t0, t0, a1
+; RV64I-NEXT: slli a1, a1, 62
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 456(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s4, t5, a0
+; RV64I-NEXT: ld a0, 912(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, a1, a0
+; RV64I-NEXT: ld a0, 896(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, a0, a1
+; RV64I-NEXT: ld a0, 872(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 464(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, a1, a0
+; RV64I-NEXT: ld a0, 848(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, a0, a1
+; RV64I-NEXT: ld a0, 800(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 784(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, a0, s0
+; RV64I-NEXT: ld a0, 736(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 728(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, a0, s1
+; RV64I-NEXT: ld a0, 672(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 656(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, a0, a1
+; RV64I-NEXT: ld a0, 600(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 584(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s3, a0, a1
+; RV64I-NEXT: ld a0, 544(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 536(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, a0, a1
+; RV64I-NEXT: ld a0, 488(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 480(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, a0, a1
+; RV64I-NEXT: ld a0, 384(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, a0
+; RV64I-NEXT: ld a0, 344(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 296(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: ld a2, 280(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a3, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, a3
+; RV64I-NEXT: ld a3, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a4, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a3, a4
+; RV64I-NEXT: ld a4, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a4, a5
+; RV64I-NEXT: ld a5, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a6, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, a6
+; RV64I-NEXT: ld a6, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a7, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a6, a7
+; RV64I-NEXT: ld a7, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld ra, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, ra
+; RV64I-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s11, ra, s11
+; RV64I-NEXT: xor s5, s6, s5
+; RV64I-NEXT: xor t3, s4, t3
+; RV64I-NEXT: ld s4, 928(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t4, s4
+; RV64I-NEXT: ld s4, 904(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t5, s4
+; RV64I-NEXT: ld s4, 864(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, t6, s4
+; RV64I-NEXT: ld s4, 816(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s0, s4
+; RV64I-NEXT: ld s4, 760(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, s1, s4
+; RV64I-NEXT: ld s4, 696(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, s2, s4
+; RV64I-NEXT: ld s4, 632(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s3, s3, s4
+; RV64I-NEXT: ld s4, 552(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, s4
+; RV64I-NEXT: ld s4, 496(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, s4
+; RV64I-NEXT: xor a0, t0, a0
+; RV64I-NEXT: ld t0, 352(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, t0
+; RV64I-NEXT: ld t0, 304(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, t0
+; RV64I-NEXT: ld t0, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a3, t0
+; RV64I-NEXT: ld t0, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a4, t0
+; RV64I-NEXT: ld t0, 160(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, t0
+; RV64I-NEXT: ld t0, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a6, t0
+; RV64I-NEXT: ld t0, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, t0
+; RV64I-NEXT: ld t0, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, s11, t0
+; RV64I-NEXT: xor s4, s5, s7
+; RV64I-NEXT: xor t3, t3, t4
+; RV64I-NEXT: ld t4, 936(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t5, t4
+; RV64I-NEXT: ld t5, 888(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t6, t5
+; RV64I-NEXT: ld t6, 832(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s0, t6
+; RV64I-NEXT: ld s0, 776(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s1, s0
+; RV64I-NEXT: ld s1, 712(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, s2, s1
+; RV64I-NEXT: ld s2, 640(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, s3, s2
+; RV64I-NEXT: ld s3, 560(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, s3
+; RV64I-NEXT: ld s3, 504(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, s3
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 288(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 232(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: ld a6, 360(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a7, a6
+; RV64I-NEXT: ld a7, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, t0, a7
+; RV64I-NEXT: xor t0, s4, s8
+; RV64I-NEXT: xor t3, t3, t4
+; RV64I-NEXT: ld t4, 920(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t5, t4
+; RV64I-NEXT: ld t5, 856(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t6, t5
+; RV64I-NEXT: ld t6, 792(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s0, t6
+; RV64I-NEXT: ld s0, 720(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s1, s0
+; RV64I-NEXT: ld s1, 648(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, s2, s1
+; RV64I-NEXT: ld s2, 568(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, s2
+; RV64I-NEXT: ld s2, 512(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, s2
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: ld a6, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a7, a6
+; RV64I-NEXT: xor a7, t0, s10
+; RV64I-NEXT: xor t0, t3, t4
+; RV64I-NEXT: ld t3, 880(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t5, t3
+; RV64I-NEXT: ld t4, 808(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t6, t4
+; RV64I-NEXT: ld t5, 744(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, s0, t5
+; RV64I-NEXT: ld t6, 664(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s1, t6
+; RV64I-NEXT: ld s0, 576(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, s0
+; RV64I-NEXT: ld s0, 520(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, s0
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 376(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: xor a6, a7, s9
+; RV64I-NEXT: xor a7, t0, t3
+; RV64I-NEXT: ld t0, 824(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t4, t0
+; RV64I-NEXT: ld t3, 752(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t5, t3
+; RV64I-NEXT: ld t4, 680(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t6, t4
+; RV64I-NEXT: ld t5, 592(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, t5
+; RV64I-NEXT: ld t5, 528(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, t5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 392(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: xor a6, a7, t0
+; RV64I-NEXT: ld a7, 768(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, t3, a7
+; RV64I-NEXT: ld t0, 688(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t4, t0
+; RV64I-NEXT: ld t3, 608(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, t3
+; RV64I-NEXT: ld t3, 944(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, t3
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: xor a5, a6, a7
+; RV64I-NEXT: ld a6, 704(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, t0, a6
+; RV64I-NEXT: ld a7, 624(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, t1, a7
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 328(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: xor a3, a5, a6
+; RV64I-NEXT: ld a5, 616(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a7, a5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: xor a3, a3, a5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: xor a1, a3, t2
+; RV64I-NEXT: xor a0, a0, a4
+; RV64I-NEXT: srli a2, a1, 40
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 24
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: srli a6, a0, 8
+; RV64I-NEXT: ld a7, 992(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a5, a5, a7
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: srliw a7, a1, 24
+; RV64I-NEXT: ld t1, 976(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a2, t1
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: lui t0, 4080
+; RV64I-NEXT: and a3, a1, t0
+; RV64I-NEXT: and a4, a4, t0
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: slli a5, a1, 56
+; RV64I-NEXT: and a1, a1, t1
+; RV64I-NEXT: slli a7, a7, 32
+; RV64I-NEXT: slli a3, a3, 24
+; RV64I-NEXT: or a3, a3, a7
+; RV64I-NEXT: srli a7, a0, 40
+; RV64I-NEXT: slli a1, a1, 40
+; RV64I-NEXT: or a1, a5, a1
; RV64I-NEXT: srli a5, a0, 56
-; RV64I-NEXT: and a6, t2, t5
-; RV64I-NEXT: and a7, t3, s11
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: and a7, a7, t1
+; RV64I-NEXT: or a5, a7, a5
+; RV64I-NEXT: srli a7, a0, 24
+; RV64I-NEXT: and a7, a7, t0
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: srliw a7, a0, 24
-; RV64I-NEXT: and t0, t4, s10
-; RV64I-NEXT: or a5, t0, a5
-; RV64I-NEXT: and t0, a0, t5
+; RV64I-NEXT: and t0, a0, t0
; RV64I-NEXT: slli a7, a7, 32
; RV64I-NEXT: slli t0, t0, 24
; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: and t0, a0, t1
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: slli t0, t0, 40
+; RV64I-NEXT: or a0, a0, t0
+; RV64I-NEXT: or a2, a4, a2
; RV64I-NEXT: or a1, a1, a3
-; RV64I-NEXT: slli a3, a2, 56
-; RV64I-NEXT: and a2, a2, s10
-; RV64I-NEXT: slli a2, a2, 40
-; RV64I-NEXT: or a2, a3, a2
; RV64I-NEXT: or a3, a6, a5
-; RV64I-NEXT: slli a5, a0, 56
-; RV64I-NEXT: and a0, a0, s10
-; RV64I-NEXT: slli a0, a0, 40
-; RV64I-NEXT: or a0, a5, a0
-; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: or a0, a0, a7
-; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: or a1, a1, a2
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: srli a2, a1, 4
-; RV64I-NEXT: and a1, a1, s9
+; RV64I-NEXT: ld a4, 984(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a1, a4
; RV64I-NEXT: srli a3, a0, 4
-; RV64I-NEXT: and a0, a0, s9
-; RV64I-NEXT: and a2, a2, s9
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: and a3, a3, a4
; RV64I-NEXT: slli a1, a1, 4
-; RV64I-NEXT: and a3, a3, s9
; RV64I-NEXT: slli a0, a0, 4
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: srli a2, a1, 2
-; RV64I-NEXT: and a1, a1, s8
+; RV64I-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a1, a4
; RV64I-NEXT: srli a3, a0, 2
-; RV64I-NEXT: and a0, a0, s8
-; RV64I-NEXT: and a2, a2, s8
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: and a3, a3, a4
; RV64I-NEXT: slli a1, a1, 2
-; RV64I-NEXT: and a3, a3, s8
-; RV64I-NEXT: slli a0, a0, 2
; RV64I-NEXT: or a1, a2, a1
-; RV64I-NEXT: or a0, a3, a0
-; RV64I-NEXT: srli a2, a1, 1
-; RV64I-NEXT: and a1, a1, s7
-; RV64I-NEXT: srli a3, a0, 1
-; RV64I-NEXT: and a0, a0, s7
-; RV64I-NEXT: and a2, a2, s7
-; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: and a3, a3, s7
-; RV64I-NEXT: slli a0, a0, 1
-; RV64I-NEXT: or s4, a2, a1
-; RV64I-NEXT: or s0, a3, a0
-; RV64I-NEXT: andi a1, s0, 2
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s0, 1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, a0, s5
-; RV64I-NEXT: andi a1, s0, 4
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s0, 8
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: andi a1, s0, 16
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s0, 32
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s0, 64
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: xor s6, s1, a0
-; RV64I-NEXT: andi a1, s0, 128
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s0, 256
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: andi a1, s0, 512
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: andi a1, s0, 1024
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: li s1, 1
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: slli a1, s1, 11
-; RV64I-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a1, 32
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 64
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 256
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 512
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: lui a1, 1024
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a1, 2048
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 4096
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 16384
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 32768
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 65536
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: lui a1, 131072
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a1, 262144
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 31
-; RV64I-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 32
-; RV64I-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 33
-; RV64I-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 34
-; RV64I-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 35
-; RV64I-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 36
-; RV64I-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: slli a1, s1, 37
-; RV64I-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: slli a1, s1, 38
-; RV64I-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 39
-; RV64I-NEXT: sd a1, 200(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 40
-; RV64I-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 41
-; RV64I-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 42
-; RV64I-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 43
-; RV64I-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 44
-; RV64I-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 45
-; RV64I-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: slli a1, s1, 46
-; RV64I-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: slli a1, s1, 47
-; RV64I-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 48
-; RV64I-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 49
-; RV64I-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 50
-; RV64I-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 51
-; RV64I-NEXT: sd a1, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 52
-; RV64I-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 53
-; RV64I-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 54
-; RV64I-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 55
-; RV64I-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: slli a1, s1, 56
-; RV64I-NEXT: sd a1, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: slli a1, s1, 57
-; RV64I-NEXT: sd a1, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 58
-; RV64I-NEXT: sd a1, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 59
-; RV64I-NEXT: sd a1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 60
-; RV64I-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 61
-; RV64I-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 62
-; RV64I-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: li s1, -1
-; RV64I-NEXT: slli a1, s1, 63
-; RV64I-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: srli a1, s3, 24
-; RV64I-NEXT: srli a2, s3, 8
-; RV64I-NEXT: srli a3, s3, 40
-; RV64I-NEXT: srli a4, s3, 56
-; RV64I-NEXT: lui t0, 4080
-; RV64I-NEXT: and a5, s3, t0
-; RV64I-NEXT: srliw a6, s3, 24
-; RV64I-NEXT: xor a0, s6, a0
-; RV64I-NEXT: and a7, s3, s10
-; RV64I-NEXT: slli s3, s3, 56
-; RV64I-NEXT: and a1, a1, t0
-; RV64I-NEXT: lui s1, 4080
-; RV64I-NEXT: and a2, a2, s11
-; RV64I-NEXT: or a1, a2, a1
-; RV64I-NEXT: srli a2, s2, 24
-; RV64I-NEXT: and a3, a3, s10
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: srli a4, s2, 8
-; RV64I-NEXT: slli a5, a5, 24
-; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: srli a6, s2, 40
-; RV64I-NEXT: slli a7, a7, 40
-; RV64I-NEXT: or a7, s3, a7
-; RV64I-NEXT: srli t0, s2, 56
-; RV64I-NEXT: and a2, a2, s1
-; RV64I-NEXT: and a4, a4, s11
-; RV64I-NEXT: or a2, a4, a2
-; RV64I-NEXT: and a4, s2, s1
-; RV64I-NEXT: and a6, a6, s10
-; RV64I-NEXT: or a6, a6, t0
-; RV64I-NEXT: srliw t0, s2, 24
-; RV64I-NEXT: slli a4, a4, 24
-; RV64I-NEXT: slli t0, t0, 32
-; RV64I-NEXT: or a4, a4, t0
-; RV64I-NEXT: and t0, s2, s10
-; RV64I-NEXT: slli s2, s2, 56
-; RV64I-NEXT: slli t0, t0, 40
-; RV64I-NEXT: or t0, s2, t0
-; RV64I-NEXT: or a1, a1, a3
-; RV64I-NEXT: srli a3, a0, 40
-; RV64I-NEXT: or a5, a7, a5
-; RV64I-NEXT: srli a7, a0, 56
-; RV64I-NEXT: or a2, a2, a6
-; RV64I-NEXT: srli a6, a0, 24
-; RV64I-NEXT: or a4, t0, a4
-; RV64I-NEXT: srli t0, a0, 8
-; RV64I-NEXT: and a3, a3, s10
-; RV64I-NEXT: or a3, a3, a7
-; RV64I-NEXT: srliw a7, a0, 24
-; RV64I-NEXT: and a6, a6, s1
-; RV64I-NEXT: and t0, t0, s11
-; RV64I-NEXT: or a6, t0, a6
-; RV64I-NEXT: and t0, a0, s1
-; RV64I-NEXT: slli a7, a7, 32
-; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a7, t0, a7
-; RV64I-NEXT: slli t0, a0, 56
-; RV64I-NEXT: and a0, a0, s10
-; RV64I-NEXT: slli a0, a0, 40
-; RV64I-NEXT: or a0, t0, a0
-; RV64I-NEXT: or a1, a5, a1
-; RV64I-NEXT: or a2, a4, a2
-; RV64I-NEXT: lui a4, %hi(.LCPI6_0)
-; RV64I-NEXT: ld s4, %lo(.LCPI6_0)(a4)
-; RV64I-NEXT: or a3, a6, a3
-; RV64I-NEXT: or a0, a0, a7
-; RV64I-NEXT: srli a4, a1, 4
-; RV64I-NEXT: and a1, a1, s9
-; RV64I-NEXT: srli a5, a2, 4
-; RV64I-NEXT: and a2, a2, s9
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: and a3, a4, s9
-; RV64I-NEXT: slli a1, a1, 4
-; RV64I-NEXT: and a4, a5, s9
-; RV64I-NEXT: slli a2, a2, 4
-; RV64I-NEXT: srli a5, a0, 4
-; RV64I-NEXT: and a0, a0, s9
-; RV64I-NEXT: or a1, a3, a1
-; RV64I-NEXT: or a2, a4, a2
-; RV64I-NEXT: and a3, a5, s9
-; RV64I-NEXT: slli a0, a0, 4
-; RV64I-NEXT: srli a4, a1, 2
-; RV64I-NEXT: and a1, a1, s8
-; RV64I-NEXT: srli a5, a2, 2
-; RV64I-NEXT: and a2, a2, s8
-; RV64I-NEXT: or a0, a3, a0
-; RV64I-NEXT: and a3, a4, s8
-; RV64I-NEXT: slli a1, a1, 2
-; RV64I-NEXT: and a4, a5, s8
-; RV64I-NEXT: slli a2, a2, 2
-; RV64I-NEXT: srli a5, a0, 2
-; RV64I-NEXT: and a0, a0, s8
-; RV64I-NEXT: or a1, a3, a1
-; RV64I-NEXT: or a2, a4, a2
-; RV64I-NEXT: and a3, a5, s8
+; RV64I-NEXT: lui a2, %hi(.LCPI6_0)
+; RV64I-NEXT: ld a2, %lo(.LCPI6_0)(a2)
; RV64I-NEXT: slli a0, a0, 2
-; RV64I-NEXT: srli a4, a1, 1
-; RV64I-NEXT: and a1, a1, s7
-; RV64I-NEXT: srli a5, a2, 1
-; RV64I-NEXT: and a2, a2, s7
; RV64I-NEXT: or a0, a3, a0
-; RV64I-NEXT: and a3, a4, s7
+; RV64I-NEXT: srli a3, a1, 1
+; RV64I-NEXT: ld a4, 960(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a1, a4
+; RV64I-NEXT: and a4, a0, a4
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: and a3, a3, a2
+; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: and a4, a5, s7
-; RV64I-NEXT: slli a2, a2, 1
-; RV64I-NEXT: srli a5, a0, 1
-; RV64I-NEXT: and a0, a0, s7
-; RV64I-NEXT: or s2, a3, a1
-; RV64I-NEXT: or s0, a4, a2
-; RV64I-NEXT: and a1, a5, s4
-; RV64I-NEXT: slli a0, a0, 1
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: srli s5, a0, 1
-; RV64I-NEXT: andi a1, s0, 2
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: andi a1, s0, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s6, a0, s3
-; RV64I-NEXT: andi a1, s0, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: andi a1, s0, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: andi a1, s0, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: andi a1, s0, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s0, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: andi a1, s0, 128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: andi a1, s0, 256
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s0, 512
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s0, 1024
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: lui a1, 32
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 64
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 256
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 512
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: lui a1, 1024
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: lui a1, 2048
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 4096
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 16384
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 32768
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 65536
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: lui a1, 131072
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: lui a1, 262144
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: ld a1, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor a0, s6, a0
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: srli a2, a0, 56
-; RV64I-NEXT: srli a3, a0, 8
-; RV64I-NEXT: and a3, a3, s11
-; RV64I-NEXT: srli a4, a0, 24
-; RV64I-NEXT: and a1, a1, s10
-; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: srliw a2, a0, 24
-; RV64I-NEXT: and a4, a4, s1
-; RV64I-NEXT: and a5, a0, s1
-; RV64I-NEXT: and a6, a0, s10
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: slli a5, a5, 24
-; RV64I-NEXT: slli a6, a6, 40
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: or a2, a5, a2
-; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: or a1, a3, a1
-; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: and a0, a0, s9
-; RV64I-NEXT: and a1, a1, s9
-; RV64I-NEXT: slli a0, a0, 4
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 2
-; RV64I-NEXT: and a0, a0, s8
-; RV64I-NEXT: and a1, a1, s8
-; RV64I-NEXT: slli a0, a0, 2
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: and a0, a0, s7
-; RV64I-NEXT: and a1, a1, s4
-; RV64I-NEXT: slli a0, a0, 1
-; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: slli a4, a4, 1
+; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: srli a1, a1, 1
; RV64I-NEXT: srli a0, a0, 1
-; RV64I-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sd s5, 0(a1)
-; RV64I-NEXT: sd a0, 8(a1)
-; RV64I-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sd s5, 0(a1)
-; RV64I-NEXT: sd a0, 8(a1)
-; RV64I-NEXT: ld ra, 392(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 384(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 376(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 368(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 360(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 352(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 344(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 336(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 328(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 320(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 312(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s10, 304(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s11, 296(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 400
+; RV64I-NEXT: ld a2, 1000(sp) # 8-byte Folded Reload
+; RV64I-NEXT: sd a1, 0(a2)
+; RV64I-NEXT: sd a0, 8(a2)
+; RV64I-NEXT: ld a2, 1008(sp) # 8-byte Folded Reload
+; RV64I-NEXT: sd a1, 0(a2)
+; RV64I-NEXT: sd a0, 8(a2)
+; RV64I-NEXT: ld ra, 1112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 1104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 1096(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 1088(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 1080(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 1072(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 1064(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 1056(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 1048(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 1040(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 1032(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 1024(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 1016(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 1120
; RV64I-NEXT: ret
;
; RV32IM-LABEL: commutative_clmulh_v2i64:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -752
-; RV32IM-NEXT: sw ra, 748(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s0, 744(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s1, 740(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s2, 736(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s3, 732(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s4, 728(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s5, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s6, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s7, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s8, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s9, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s10, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s11, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a2, 640(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi sp, sp, -704
+; RV32IM-NEXT: sw ra, 700(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s0, 696(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 692(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s2, 688(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s3, 684(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s4, 680(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s5, 676(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s6, 672(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s7, 668(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s8, 664(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s9, 660(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s10, 656(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s11, 652(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a3, 636(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a2, 632(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a2, 0(a0)
; RV32IM-NEXT: lw s6, 4(a0)
-; RV32IM-NEXT: lw t0, 8(a0)
-; RV32IM-NEXT: lw t5, 12(a0)
-; RV32IM-NEXT: lw a5, 0(a1)
+; RV32IM-NEXT: lw t2, 8(a0)
+; RV32IM-NEXT: lw t3, 12(a0)
+; RV32IM-NEXT: lw t1, 0(a1)
; RV32IM-NEXT: lw s2, 4(a1)
-; RV32IM-NEXT: lw t2, 8(a1)
-; RV32IM-NEXT: lw t6, 12(a1)
-; RV32IM-NEXT: lui a0, 16
-; RV32IM-NEXT: lui t1, 61681
-; RV32IM-NEXT: lui t3, 209715
-; RV32IM-NEXT: lui a7, 349525
-; RV32IM-NEXT: li s9, 1
-; RV32IM-NEXT: addi s10, a0, -256
+; RV32IM-NEXT: lw a5, 8(a1)
+; RV32IM-NEXT: lw t5, 12(a1)
+; RV32IM-NEXT: lui a3, 16
+; RV32IM-NEXT: lui a0, 61681
+; RV32IM-NEXT: lui a1, 209715
+; RV32IM-NEXT: lui s7, 349525
+; RV32IM-NEXT: li t0, 1
+; RV32IM-NEXT: addi s11, a3, -256
+; RV32IM-NEXT: addi s10, a0, -241
+; RV32IM-NEXT: addi s8, a1, 819
; RV32IM-NEXT: srli t4, s2, 8
; RV32IM-NEXT: srli s3, s2, 24
-; RV32IM-NEXT: and s0, s2, s10
+; RV32IM-NEXT: and a0, s2, s11
; RV32IM-NEXT: slli s2, s2, 24
-; RV32IM-NEXT: srli a6, s6, 8
-; RV32IM-NEXT: srli a3, s6, 24
-; RV32IM-NEXT: and s1, s6, s10
+; RV32IM-NEXT: srli a1, s6, 8
+; RV32IM-NEXT: srli a6, s6, 24
+; RV32IM-NEXT: and s5, s6, s11
; RV32IM-NEXT: slli s6, s6, 24
-; RV32IM-NEXT: srli a0, a5, 8
-; RV32IM-NEXT: srli a4, a5, 24
-; RV32IM-NEXT: and a1, a5, s10
-; RV32IM-NEXT: slli a5, a5, 24
-; RV32IM-NEXT: and t4, t4, s10
+; RV32IM-NEXT: srli a3, t1, 8
+; RV32IM-NEXT: srli a7, t1, 24
+; RV32IM-NEXT: and a4, t1, s11
+; RV32IM-NEXT: slli t1, t1, 24
+; RV32IM-NEXT: and t4, t4, s11
; RV32IM-NEXT: or t4, t4, s3
; RV32IM-NEXT: srli s3, a2, 8
-; RV32IM-NEXT: slli s0, s0, 8
-; RV32IM-NEXT: or s0, s2, s0
+; RV32IM-NEXT: slli a0, a0, 8
+; RV32IM-NEXT: or a0, s2, a0
; RV32IM-NEXT: srli s4, a2, 24
-; RV32IM-NEXT: and a6, a6, s10
-; RV32IM-NEXT: or s2, a6, a3
-; RV32IM-NEXT: and a6, a2, s10
-; RV32IM-NEXT: slli a3, a2, 24
-; RV32IM-NEXT: slli s1, s1, 8
-; RV32IM-NEXT: or s1, s6, s1
-; RV32IM-NEXT: srli s5, t6, 8
-; RV32IM-NEXT: and a0, a0, s10
-; RV32IM-NEXT: or a0, a0, a4
-; RV32IM-NEXT: srli s7, t6, 24
-; RV32IM-NEXT: slli a1, a1, 8
-; RV32IM-NEXT: or a1, a5, a1
-; RV32IM-NEXT: and s6, t6, s10
-; RV32IM-NEXT: slli s8, t6, 24
-; RV32IM-NEXT: and a2, s3, s10
-; RV32IM-NEXT: or a2, a2, s4
+; RV32IM-NEXT: and a1, a1, s11
+; RV32IM-NEXT: or a1, a1, a6
+; RV32IM-NEXT: and a6, a2, s11
+; RV32IM-NEXT: slli s2, a2, 24
+; RV32IM-NEXT: slli s5, s5, 8
+; RV32IM-NEXT: or a2, s6, s5
; RV32IM-NEXT: srli t6, t5, 8
-; RV32IM-NEXT: slli a4, a6, 8
-; RV32IM-NEXT: or a4, a3, a4
-; RV32IM-NEXT: srli a3, t5, 24
-; RV32IM-NEXT: and a5, s5, s10
-; RV32IM-NEXT: or a5, a5, s7
-; RV32IM-NEXT: and s3, t5, s10
-; RV32IM-NEXT: slli t5, t5, 24
-; RV32IM-NEXT: slli s6, s6, 8
-; RV32IM-NEXT: or a6, s8, s6
-; RV32IM-NEXT: srli s4, t2, 8
-; RV32IM-NEXT: and t6, t6, s10
-; RV32IM-NEXT: or a3, t6, a3
-; RV32IM-NEXT: srli t6, t2, 24
-; RV32IM-NEXT: slli s3, s3, 8
+; RV32IM-NEXT: and a3, a3, s11
+; RV32IM-NEXT: or a3, a3, a7
+; RV32IM-NEXT: srli s1, t5, 24
+; RV32IM-NEXT: slli a4, a4, 8
+; RV32IM-NEXT: or a4, t1, a4
+; RV32IM-NEXT: and s0, t5, s11
+; RV32IM-NEXT: slli s5, t5, 24
+; RV32IM-NEXT: and a7, s3, s11
+; RV32IM-NEXT: or a7, a7, s4
+; RV32IM-NEXT: srli t5, t3, 8
+; RV32IM-NEXT: slli a6, a6, 8
+; RV32IM-NEXT: or t1, s2, a6
+; RV32IM-NEXT: srli s3, t3, 24
+; RV32IM-NEXT: and a6, t6, s11
+; RV32IM-NEXT: or s2, a6, s1
+; RV32IM-NEXT: and s4, t3, s11
+; RV32IM-NEXT: slli t3, t3, 24
+; RV32IM-NEXT: slli s0, s0, 8
+; RV32IM-NEXT: or a6, s5, s0
+; RV32IM-NEXT: srli t6, a5, 8
+; RV32IM-NEXT: and t5, t5, s11
; RV32IM-NEXT: or t5, t5, s3
-; RV32IM-NEXT: and s3, t2, s10
-; RV32IM-NEXT: slli t2, t2, 24
-; RV32IM-NEXT: and s4, s4, s10
-; RV32IM-NEXT: or t6, s4, t6
-; RV32IM-NEXT: srli s4, t0, 8
+; RV32IM-NEXT: srli s0, a5, 24
+; RV32IM-NEXT: slli s4, s4, 8
+; RV32IM-NEXT: or t3, t3, s4
+; RV32IM-NEXT: and s3, a5, s11
+; RV32IM-NEXT: slli s4, a5, 24
+; RV32IM-NEXT: and a5, t6, s11
+; RV32IM-NEXT: or a5, a5, s0
+; RV32IM-NEXT: srli t6, t2, 8
; RV32IM-NEXT: slli s3, s3, 8
-; RV32IM-NEXT: or s3, t2, s3
-; RV32IM-NEXT: srli t2, t0, 24
-; RV32IM-NEXT: and s4, s4, s10
-; RV32IM-NEXT: or s4, s4, t2
-; RV32IM-NEXT: and t2, t0, s10
-; RV32IM-NEXT: sw s10, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli t0, t0, 24
-; RV32IM-NEXT: slli t2, t2, 8
-; RV32IM-NEXT: or s5, t0, t2
-; RV32IM-NEXT: or t4, s0, t4
-; RV32IM-NEXT: or s0, s1, s2
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: or a2, a4, a2
-; RV32IM-NEXT: or a1, a6, a5
-; RV32IM-NEXT: addi t1, t1, -241
-; RV32IM-NEXT: sw t1, 684(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: addi t0, t3, 819
-; RV32IM-NEXT: or a3, t5, a3
-; RV32IM-NEXT: addi a7, a7, 1365
-; RV32IM-NEXT: or a4, s3, t6
-; RV32IM-NEXT: or a5, s5, s4
-; RV32IM-NEXT: srli a6, t4, 4
-; RV32IM-NEXT: and t3, t4, t1
-; RV32IM-NEXT: srli t4, s0, 4
-; RV32IM-NEXT: and t5, s0, t1
-; RV32IM-NEXT: and a6, a6, t1
-; RV32IM-NEXT: slli t3, t3, 4
-; RV32IM-NEXT: or a6, a6, t3
-; RV32IM-NEXT: srli t3, a0, 4
-; RV32IM-NEXT: and a0, a0, t1
-; RV32IM-NEXT: and t4, t4, t1
-; RV32IM-NEXT: slli t5, t5, 4
-; RV32IM-NEXT: or t4, t4, t5
-; RV32IM-NEXT: srli t5, a2, 4
-; RV32IM-NEXT: and a2, a2, t1
-; RV32IM-NEXT: and t3, t3, t1
+; RV32IM-NEXT: or s0, s4, s3
+; RV32IM-NEXT: srli s1, t2, 24
+; RV32IM-NEXT: and t6, t6, s11
+; RV32IM-NEXT: or t6, t6, s1
+; RV32IM-NEXT: and s1, t2, s11
+; RV32IM-NEXT: slli t2, t2, 24
+; RV32IM-NEXT: slli s1, s1, 8
+; RV32IM-NEXT: or s1, t2, s1
+; RV32IM-NEXT: addi t2, s7, 1365
+; RV32IM-NEXT: slli s3, t0, 11
+; RV32IM-NEXT: or a0, a0, t4
+; RV32IM-NEXT: or a1, a2, a1
+; RV32IM-NEXT: or a3, a4, a3
+; RV32IM-NEXT: or a2, t1, a7
+; RV32IM-NEXT: or a4, a6, s2
+; RV32IM-NEXT: or a6, t3, t5
+; RV32IM-NEXT: or a5, s0, a5
+; RV32IM-NEXT: or a7, s1, t6
+; RV32IM-NEXT: srli t0, a0, 4
+; RV32IM-NEXT: sw s10, 648(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, s10
+; RV32IM-NEXT: srli t1, a1, 4
+; RV32IM-NEXT: and a1, a1, s10
+; RV32IM-NEXT: srli t3, a3, 4
+; RV32IM-NEXT: and a3, a3, s10
+; RV32IM-NEXT: srli t4, a2, 4
+; RV32IM-NEXT: and a2, a2, s10
+; RV32IM-NEXT: srli t5, a4, 4
+; RV32IM-NEXT: and a4, a4, s10
+; RV32IM-NEXT: srli t6, a6, 4
+; RV32IM-NEXT: and a6, a6, s10
+; RV32IM-NEXT: srli s0, a5, 4
+; RV32IM-NEXT: and a5, a5, s10
+; RV32IM-NEXT: srli s1, a7, 4
+; RV32IM-NEXT: and a7, a7, s10
+; RV32IM-NEXT: and t0, t0, s10
; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: or a0, t3, a0
-; RV32IM-NEXT: srli t3, a1, 4
-; RV32IM-NEXT: and a1, a1, t1
-; RV32IM-NEXT: and t5, t5, t1
-; RV32IM-NEXT: slli a2, a2, 4
-; RV32IM-NEXT: or a2, t5, a2
-; RV32IM-NEXT: srli t5, a3, 4
-; RV32IM-NEXT: and a3, a3, t1
-; RV32IM-NEXT: and t3, t3, t1
+; RV32IM-NEXT: and t1, t1, s10
; RV32IM-NEXT: slli a1, a1, 4
-; RV32IM-NEXT: or a1, t3, a1
-; RV32IM-NEXT: srli t3, a4, 4
-; RV32IM-NEXT: and a4, a4, t1
-; RV32IM-NEXT: and t5, t5, t1
+; RV32IM-NEXT: and t3, t3, s10
; RV32IM-NEXT: slli a3, a3, 4
-; RV32IM-NEXT: or a3, t5, a3
-; RV32IM-NEXT: srli t5, a5, 4
-; RV32IM-NEXT: and a5, a5, t1
-; RV32IM-NEXT: and t3, t3, t1
+; RV32IM-NEXT: and t4, t4, s10
+; RV32IM-NEXT: slli a2, a2, 4
+; RV32IM-NEXT: and t5, t5, s10
; RV32IM-NEXT: slli a4, a4, 4
-; RV32IM-NEXT: and t5, t5, t1
+; RV32IM-NEXT: and t6, t6, s10
+; RV32IM-NEXT: slli a6, a6, 4
+; RV32IM-NEXT: and s0, s0, s10
; RV32IM-NEXT: slli a5, a5, 4
-; RV32IM-NEXT: or a4, t3, a4
-; RV32IM-NEXT: or a5, t5, a5
-; RV32IM-NEXT: srli t3, a6, 2
-; RV32IM-NEXT: sw t0, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a6, a6, t0
-; RV32IM-NEXT: srli t5, t4, 2
-; RV32IM-NEXT: and t4, t4, t0
-; RV32IM-NEXT: and t3, t3, t0
-; RV32IM-NEXT: slli a6, a6, 2
-; RV32IM-NEXT: or a6, t3, a6
-; RV32IM-NEXT: srli t3, a0, 2
-; RV32IM-NEXT: and a0, a0, t0
-; RV32IM-NEXT: and t5, t5, t0
-; RV32IM-NEXT: slli t4, t4, 2
-; RV32IM-NEXT: or t4, t5, t4
-; RV32IM-NEXT: srli t5, a2, 2
-; RV32IM-NEXT: and a2, a2, t0
-; RV32IM-NEXT: and t3, t3, t0
+; RV32IM-NEXT: and s1, s1, s10
+; RV32IM-NEXT: slli a7, a7, 4
+; RV32IM-NEXT: or a0, t0, a0
+; RV32IM-NEXT: or a1, t1, a1
+; RV32IM-NEXT: or a3, t3, a3
+; RV32IM-NEXT: or a2, t4, a2
+; RV32IM-NEXT: or a4, t5, a4
+; RV32IM-NEXT: or a6, t6, a6
+; RV32IM-NEXT: or a5, s0, a5
+; RV32IM-NEXT: or a7, s1, a7
+; RV32IM-NEXT: srli t0, a0, 2
+; RV32IM-NEXT: sw s8, 628(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, s8
+; RV32IM-NEXT: srli t1, a1, 2
+; RV32IM-NEXT: and a1, a1, s8
+; RV32IM-NEXT: srli t3, a3, 2
+; RV32IM-NEXT: and a3, a3, s8
+; RV32IM-NEXT: srli t4, a2, 2
+; RV32IM-NEXT: and a2, a2, s8
+; RV32IM-NEXT: srli t5, a4, 2
+; RV32IM-NEXT: and a4, a4, s8
+; RV32IM-NEXT: srli t6, a6, 2
+; RV32IM-NEXT: and a6, a6, s8
+; RV32IM-NEXT: srli s0, a5, 2
+; RV32IM-NEXT: and a5, a5, s8
+; RV32IM-NEXT: srli s1, a7, 2
+; RV32IM-NEXT: and a7, a7, s8
+; RV32IM-NEXT: and t0, t0, s8
; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or a0, t3, a0
-; RV32IM-NEXT: srli t3, a1, 2
-; RV32IM-NEXT: and a1, a1, t0
-; RV32IM-NEXT: and t5, t5, t0
-; RV32IM-NEXT: slli a2, a2, 2
-; RV32IM-NEXT: or a2, t5, a2
-; RV32IM-NEXT: srli t5, a3, 2
-; RV32IM-NEXT: and a3, a3, t0
-; RV32IM-NEXT: and t3, t3, t0
+; RV32IM-NEXT: and t1, t1, s8
; RV32IM-NEXT: slli a1, a1, 2
-; RV32IM-NEXT: or a1, t3, a1
-; RV32IM-NEXT: srli t3, a4, 2
-; RV32IM-NEXT: and a4, a4, t0
-; RV32IM-NEXT: and t5, t5, t0
+; RV32IM-NEXT: and t3, t3, s8
; RV32IM-NEXT: slli a3, a3, 2
-; RV32IM-NEXT: or t5, t5, a3
-; RV32IM-NEXT: srli a3, a5, 2
-; RV32IM-NEXT: and a5, a5, t0
-; RV32IM-NEXT: and t3, t3, t0
+; RV32IM-NEXT: and t4, t4, s8
+; RV32IM-NEXT: slli a2, a2, 2
+; RV32IM-NEXT: and t5, t5, s8
; RV32IM-NEXT: slli a4, a4, 2
-; RV32IM-NEXT: and a3, a3, t0
+; RV32IM-NEXT: and t6, t6, s8
+; RV32IM-NEXT: slli a6, a6, 2
+; RV32IM-NEXT: and s0, s0, s8
; RV32IM-NEXT: slli a5, a5, 2
-; RV32IM-NEXT: or a4, t3, a4
-; RV32IM-NEXT: or t6, a3, a5
-; RV32IM-NEXT: srli a3, a6, 1
-; RV32IM-NEXT: mv t0, a7
-; RV32IM-NEXT: sw a7, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a5, a6, a7
-; RV32IM-NEXT: srli t3, t4, 1
-; RV32IM-NEXT: and t4, t4, a7
-; RV32IM-NEXT: and a3, a3, a7
-; RV32IM-NEXT: slli a5, a5, 1
-; RV32IM-NEXT: or a6, a3, a5
-; RV32IM-NEXT: srli a3, a0, 1
-; RV32IM-NEXT: and a0, a0, a7
-; RV32IM-NEXT: and a5, t3, a7
-; RV32IM-NEXT: slli t4, t4, 1
-; RV32IM-NEXT: or a5, a5, t4
-; RV32IM-NEXT: srli t3, a2, 1
-; RV32IM-NEXT: and a2, a2, a7
-; RV32IM-NEXT: and a3, a3, a7
+; RV32IM-NEXT: and s1, s1, s8
+; RV32IM-NEXT: slli a7, a7, 2
+; RV32IM-NEXT: or a0, t0, a0
+; RV32IM-NEXT: or a1, t1, a1
+; RV32IM-NEXT: or a3, t3, a3
+; RV32IM-NEXT: or a2, t4, a2
+; RV32IM-NEXT: or a4, t5, a4
+; RV32IM-NEXT: or a6, t6, a6
+; RV32IM-NEXT: or a5, s0, a5
+; RV32IM-NEXT: or a7, s1, a7
+; RV32IM-NEXT: srli t0, a0, 1
+; RV32IM-NEXT: sw t2, 644(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, t2
+; RV32IM-NEXT: srli t1, a1, 1
+; RV32IM-NEXT: and a1, a1, t2
+; RV32IM-NEXT: srli t3, a3, 1
+; RV32IM-NEXT: and a3, a3, t2
+; RV32IM-NEXT: srli t4, a2, 1
+; RV32IM-NEXT: and a2, a2, t2
+; RV32IM-NEXT: srli t5, a4, 1
+; RV32IM-NEXT: and a4, a4, t2
+; RV32IM-NEXT: srli t6, a6, 1
+; RV32IM-NEXT: and a6, a6, t2
+; RV32IM-NEXT: srli s0, a5, 1
+; RV32IM-NEXT: and a5, a5, t2
+; RV32IM-NEXT: srli s1, a7, 1
+; RV32IM-NEXT: and a7, a7, t2
+; RV32IM-NEXT: and t0, t0, t2
; RV32IM-NEXT: slli a0, a0, 1
-; RV32IM-NEXT: or a3, a3, a0
-; RV32IM-NEXT: srli a0, a1, 1
-; RV32IM-NEXT: and a1, a1, a7
-; RV32IM-NEXT: and t3, t3, a7
-; RV32IM-NEXT: slli a2, a2, 1
-; RV32IM-NEXT: or t3, t3, a2
-; RV32IM-NEXT: srli a2, t5, 1
-; RV32IM-NEXT: and t4, t5, a7
-; RV32IM-NEXT: and a0, a0, a7
+; RV32IM-NEXT: and t1, t1, t2
; RV32IM-NEXT: slli a1, a1, 1
-; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: srli a1, a4, 1
-; RV32IM-NEXT: and a4, a4, a7
-; RV32IM-NEXT: and a2, a2, a7
-; RV32IM-NEXT: slli t4, t4, 1
-; RV32IM-NEXT: or a7, a2, t4
-; RV32IM-NEXT: srli t4, t6, 1
-; RV32IM-NEXT: and t5, t6, t0
-; RV32IM-NEXT: and a1, a1, t0
+; RV32IM-NEXT: and t3, t3, t2
+; RV32IM-NEXT: slli a3, a3, 1
+; RV32IM-NEXT: and t4, t4, t2
+; RV32IM-NEXT: slli s2, a2, 1
+; RV32IM-NEXT: and t5, t5, t2
; RV32IM-NEXT: slli a4, a4, 1
-; RV32IM-NEXT: and t4, t4, t0
-; RV32IM-NEXT: slli t5, t5, 1
-; RV32IM-NEXT: or a1, a1, a4
-; RV32IM-NEXT: or a4, t4, t5
-; RV32IM-NEXT: srli t4, a6, 8
-; RV32IM-NEXT: srli t5, a6, 24
-; RV32IM-NEXT: and t4, t4, s10
-; RV32IM-NEXT: or a2, t4, t5
-; RV32IM-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s4, 1024
-; RV32IM-NEXT: slli s5, s9, 11
-; RV32IM-NEXT: lui s8, 2048
-; RV32IM-NEXT: lui s10, 4096
-; RV32IM-NEXT: lui t6, 8192
-; RV32IM-NEXT: lui s0, 16384
-; RV32IM-NEXT: lui s1, 32768
-; RV32IM-NEXT: lui t2, 65536
-; RV32IM-NEXT: lui ra, 131072
-; RV32IM-NEXT: lui t4, 262144
-; RV32IM-NEXT: lui t5, 524288
-; RV32IM-NEXT: andi a2, t3, 2
-; RV32IM-NEXT: sw a2, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 1
-; RV32IM-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 4
-; RV32IM-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 8
-; RV32IM-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 16
-; RV32IM-NEXT: sw a2, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 32
-; RV32IM-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 64
-; RV32IM-NEXT: sw a2, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 128
-; RV32IM-NEXT: sw a2, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 256
-; RV32IM-NEXT: sw a2, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 512
-; RV32IM-NEXT: sw a2, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, t3, 1024
-; RV32IM-NEXT: sw a2, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a2, t3, s5
-; RV32IM-NEXT: sw a2, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s5, 680(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t1, 1
-; RV32IM-NEXT: and a2, t3, t1
-; RV32IM-NEXT: sw a2, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t0, 2
-; RV32IM-NEXT: and a2, t3, t0
-; RV32IM-NEXT: sw a2, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s2, 4
-; RV32IM-NEXT: and a2, t3, s2
-; RV32IM-NEXT: sw a2, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 8
-; RV32IM-NEXT: and a2, t3, a2
-; RV32IM-NEXT: sw a2, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 16
-; RV32IM-NEXT: and a2, t3, a2
-; RV32IM-NEXT: sw a2, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 32
-; RV32IM-NEXT: and a2, t3, a2
-; RV32IM-NEXT: sw a2, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s11, 64
-; RV32IM-NEXT: and a2, t3, s11
-; RV32IM-NEXT: sw a2, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 128
-; RV32IM-NEXT: and s3, t3, a2
-; RV32IM-NEXT: sw s3, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s6, 256
-; RV32IM-NEXT: and s3, t3, s6
-; RV32IM-NEXT: sw s3, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s7, 512
-; RV32IM-NEXT: and s3, t3, s7
-; RV32IM-NEXT: sw s3, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, t3, s4
-; RV32IM-NEXT: sw s3, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, t3, s8
-; RV32IM-NEXT: sw s3, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, t3, s10
-; RV32IM-NEXT: sw s3, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t6, t3, t6
-; RV32IM-NEXT: sw t6, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t6, t3, s0
-; RV32IM-NEXT: sw t6, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t6, t3, s1
-; RV32IM-NEXT: sw t6, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t2, t3, t2
-; RV32IM-NEXT: sw t2, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t2, t3, ra
-; RV32IM-NEXT: sw t2, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t2, t3, t4
-; RV32IM-NEXT: sw t2, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t2, t3, t5
-; RV32IM-NEXT: sw t2, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s6, a5, 2
-; RV32IM-NEXT: andi t2, a5, 1
-; RV32IM-NEXT: sw t2, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s0, a5, 4
-; RV32IM-NEXT: sw s0, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi s7, a5, 8
-; RV32IM-NEXT: andi s8, a5, 16
-; RV32IM-NEXT: andi ra, a5, 32
-; RV32IM-NEXT: andi s10, a5, 64
-; RV32IM-NEXT: sw s10, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t2, a5, 128
-; RV32IM-NEXT: sw t2, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t4, a5, 256
-; RV32IM-NEXT: andi t5, a5, 512
-; RV32IM-NEXT: sw t5, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t6, a5, 1024
-; RV32IM-NEXT: sw t6, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s9, a5, s5
-; RV32IM-NEXT: sw s9, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t2, a5, t1
-; RV32IM-NEXT: sw t2, 288(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s1, a5, t0
-; RV32IM-NEXT: sw s1, 284(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a5, s2
-; RV32IM-NEXT: sw s3, 276(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t0, 8
-; RV32IM-NEXT: and s4, a5, t0
-; RV32IM-NEXT: sw s4, 272(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t0, 16
-; RV32IM-NEXT: and t0, a5, t0
-; RV32IM-NEXT: sw t0, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t0, 32
-; RV32IM-NEXT: and s2, a5, t0
-; RV32IM-NEXT: sw s2, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s11, a5, s11
-; RV32IM-NEXT: sw s11, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t0, a5, a2
-; RV32IM-NEXT: sw t0, 296(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 256
-; RV32IM-NEXT: and t1, a5, a2
-; RV32IM-NEXT: sw t1, 292(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 512
-; RV32IM-NEXT: and t3, a5, a2
-; RV32IM-NEXT: sw t3, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 1024
-; RV32IM-NEXT: and s5, a5, a2
-; RV32IM-NEXT: lui a2, 2048
-; RV32IM-NEXT: and a2, a5, a2
-; RV32IM-NEXT: sw a2, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 4096
-; RV32IM-NEXT: and a2, a5, a2
-; RV32IM-NEXT: sw a2, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 8192
-; RV32IM-NEXT: and a2, a5, a2
-; RV32IM-NEXT: sw a2, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 16384
-; RV32IM-NEXT: and a2, a5, a2
-; RV32IM-NEXT: sw a2, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 32768
-; RV32IM-NEXT: and a2, a5, a2
-; RV32IM-NEXT: sw a2, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 65536
-; RV32IM-NEXT: and a2, a5, a2
-; RV32IM-NEXT: sw a2, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 131072
-; RV32IM-NEXT: and a2, a5, a2
-; RV32IM-NEXT: sw a2, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 262144
-; RV32IM-NEXT: and a2, a5, a2
-; RV32IM-NEXT: sw a2, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a2, 524288
-; RV32IM-NEXT: and a2, a5, a2
-; RV32IM-NEXT: sw a2, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s6
-; RV32IM-NEXT: sw a2, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, a2
-; RV32IM-NEXT: sw a2, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s0
-; RV32IM-NEXT: sw a2, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s7
-; RV32IM-NEXT: sw a2, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s8
-; RV32IM-NEXT: sw a2, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, ra
-; RV32IM-NEXT: sw a2, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s10
-; RV32IM-NEXT: sw a2, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, s0
-; RV32IM-NEXT: sw a2, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, t4
-; RV32IM-NEXT: sw a2, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, t5
-; RV32IM-NEXT: sw a2, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, t6
-; RV32IM-NEXT: sw a2, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s9
-; RV32IM-NEXT: sw a2, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, t2
-; RV32IM-NEXT: sw a2, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s1
-; RV32IM-NEXT: sw a2, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s3
-; RV32IM-NEXT: sw a2, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s4
-; RV32IM-NEXT: sw a2, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s1, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, s1
-; RV32IM-NEXT: sw a2, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s2
-; RV32IM-NEXT: sw a2, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s11
-; RV32IM-NEXT: sw a2, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, t0
-; RV32IM-NEXT: sw a2, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, t1
-; RV32IM-NEXT: sw a2, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, t3
-; RV32IM-NEXT: sw a2, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a3, s5
-; RV32IM-NEXT: sw a2, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mv s10, s5
-; RV32IM-NEXT: lw t0, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, t0
-; RV32IM-NEXT: sw a2, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s2, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, s2
-; RV32IM-NEXT: sw a2, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t2, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, t2
-; RV32IM-NEXT: sw a2, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t3, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, t3
-; RV32IM-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t5, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, t5
-; RV32IM-NEXT: sw a2, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t6, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, t6
-; RV32IM-NEXT: sw a2, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s3, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, s3
-; RV32IM-NEXT: sw a2, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s4, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, s4
-; RV32IM-NEXT: sw a2, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s5, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a3, s5
-; RV32IM-NEXT: sw a2, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 372(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 616(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 364(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 356(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 352(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 348(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 344(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 340(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 336(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 332(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 328(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 324(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 320(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 432(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a6, a2
-; RV32IM-NEXT: lw a2, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a6, s6
-; RV32IM-NEXT: sw a2, 316(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s9, a6, s7
-; RV32IM-NEXT: mul s11, a6, s8
-; RV32IM-NEXT: mul ra, a6, ra
-; RV32IM-NEXT: lw a2, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: sw a2, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a2, a6, s0
-; RV32IM-NEXT: sw a2, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s8, a6, t4
-; RV32IM-NEXT: lw a2, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a6, a2
-; RV32IM-NEXT: lw t1, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t4, a6, t1
-; RV32IM-NEXT: sw t4, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t1, 304(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t4, a6, t1
-; RV32IM-NEXT: sw t4, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t1, 288(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, a6, t1
-; RV32IM-NEXT: sw t1, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t1, 284(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s7, a6, t1
-; RV32IM-NEXT: lw t1, 276(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s6, a6, t1
-; RV32IM-NEXT: lw t1, 272(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, a6, t1
-; RV32IM-NEXT: sw t1, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t1, a6, s1
-; RV32IM-NEXT: sw t1, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t1, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, a6, t1
-; RV32IM-NEXT: sw t1, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t1, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, a6, t1
-; RV32IM-NEXT: sw t1, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t1, 296(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t4, a6, t1
-; RV32IM-NEXT: lw t1, 292(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s0, a6, t1
-; RV32IM-NEXT: lw t1, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, a6, t1
-; RV32IM-NEXT: sw t1, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t1, a6, s10
-; RV32IM-NEXT: sw t1, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t1, a6, t0
-; RV32IM-NEXT: sw t1, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t1, a6, s2
-; RV32IM-NEXT: mul s1, a6, t2
-; RV32IM-NEXT: mul s2, a6, t3
-; RV32IM-NEXT: sw s2, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, a6, t5
-; RV32IM-NEXT: sw s2, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, a6, t6
-; RV32IM-NEXT: sw s2, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, a6, s3
-; RV32IM-NEXT: sw s2, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, a6, s4
-; RV32IM-NEXT: sw s2, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, a6, s5
-; RV32IM-NEXT: sw s2, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli s3, a6, 24
-; RV32IM-NEXT: lw s10, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a6, a6, s10
-; RV32IM-NEXT: slli a6, a6, 8
-; RV32IM-NEXT: or a6, s3, a6
-; RV32IM-NEXT: sw a6, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a6, a5, 8
-; RV32IM-NEXT: and a6, a6, s10
-; RV32IM-NEXT: srli s3, a5, 24
-; RV32IM-NEXT: or a6, a6, s3
-; RV32IM-NEXT: sw a6, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli a6, a5, 24
-; RV32IM-NEXT: and a5, a5, s10
-; RV32IM-NEXT: slli a5, a5, 8
-; RV32IM-NEXT: or a5, a6, a5
-; RV32IM-NEXT: sw a5, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a6, a5
-; RV32IM-NEXT: sw a5, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 652(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and t6, t6, t2
+; RV32IM-NEXT: slli a6, a6, 1
+; RV32IM-NEXT: and s0, s0, t2
+; RV32IM-NEXT: slli a5, a5, 1
+; RV32IM-NEXT: and s1, s1, t2
+; RV32IM-NEXT: slli a7, a7, 1
+; RV32IM-NEXT: or a0, t0, a0
+; RV32IM-NEXT: or a1, t1, a1
+; RV32IM-NEXT: or a2, t3, a3
+; RV32IM-NEXT: or a3, t4, s2
+; RV32IM-NEXT: or s4, t5, a4
+; RV32IM-NEXT: or a4, t6, a6
+; RV32IM-NEXT: or s6, s0, a5
+; RV32IM-NEXT: or s5, s1, a7
+; RV32IM-NEXT: srli a5, a0, 8
+; RV32IM-NEXT: srli a6, a0, 24
+; RV32IM-NEXT: slli a7, a0, 24
+; RV32IM-NEXT: and t0, a0, s11
+; RV32IM-NEXT: srli t1, a1, 8
+; RV32IM-NEXT: srli t3, a1, 24
+; RV32IM-NEXT: slli t5, a1, 24
+; RV32IM-NEXT: and t4, a1, s11
+; RV32IM-NEXT: and a5, a5, s11
+; RV32IM-NEXT: or a5, a5, a6
+; RV32IM-NEXT: sw a5, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mv s10, s3
+; RV32IM-NEXT: and t6, a1, s3
+; RV32IM-NEXT: slli t0, t0, 8
+; RV32IM-NEXT: or a5, a7, t0
+; RV32IM-NEXT: sw a5, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t2, 1
+; RV32IM-NEXT: and a7, a1, t2
+; RV32IM-NEXT: and a5, t1, s11
+; RV32IM-NEXT: slli a6, t4, 8
+; RV32IM-NEXT: or a5, a5, t3
+; RV32IM-NEXT: sw a5, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, a2, t6
+; RV32IM-NEXT: or t4, t5, a6
+; RV32IM-NEXT: mul a6, a2, a7
; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a5, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s1, 64
+; RV32IM-NEXT: and t0, a1, s1
+; RV32IM-NEXT: lui s2, 128
+; RV32IM-NEXT: and s0, a1, s2
+; RV32IM-NEXT: mul a5, a2, t0
+; RV32IM-NEXT: mul a6, a2, s0
; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a5, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 16384
+; RV32IM-NEXT: lui a6, 32768
+; RV32IM-NEXT: and a5, a1, a5
+; RV32IM-NEXT: sw a5, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s7, 16384
+; RV32IM-NEXT: and a6, a1, a6
+; RV32IM-NEXT: sw a6, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s8, 32768
+; RV32IM-NEXT: mul a5, a2, a5
+; RV32IM-NEXT: mul a6, a2, a6
; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: sw a5, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a6, a3, s3
+; RV32IM-NEXT: mul a6, a0, a6
+; RV32IM-NEXT: and t1, a3, t2
+; RV32IM-NEXT: lui a5, 1
+; RV32IM-NEXT: mul t1, a0, t1
+; RV32IM-NEXT: xor s9, a6, t1
+; RV32IM-NEXT: and a6, a3, s1
+; RV32IM-NEXT: lui t1, 64
+; RV32IM-NEXT: mul a6, a0, a6
+; RV32IM-NEXT: and s1, a3, s2
+; RV32IM-NEXT: lui t3, 128
+; RV32IM-NEXT: mul s1, a0, s1
+; RV32IM-NEXT: xor ra, a6, s1
+; RV32IM-NEXT: and a6, a3, s7
+; RV32IM-NEXT: lui t2, 16384
+; RV32IM-NEXT: mul a6, a0, a6
+; RV32IM-NEXT: and s1, a3, s8
+; RV32IM-NEXT: lui s7, 32768
+; RV32IM-NEXT: mul s1, a0, s1
+; RV32IM-NEXT: xor s3, a6, s1
+; RV32IM-NEXT: mul t6, a0, t6
+; RV32IM-NEXT: mul a7, a0, a7
+; RV32IM-NEXT: xor a6, t6, a7
+; RV32IM-NEXT: sw a6, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a0, t0
+; RV32IM-NEXT: mul t0, a0, s0
+; RV32IM-NEXT: xor a6, a7, t0
+; RV32IM-NEXT: sw a6, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a7, 4096
+; RV32IM-NEXT: lui t6, 8192
+; RV32IM-NEXT: and t0, a1, a7
+; RV32IM-NEXT: lui s8, 4096
+; RV32IM-NEXT: and a6, a1, t6
+; RV32IM-NEXT: sw a6, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t5, 8192
+; RV32IM-NEXT: mul t6, a0, t0
+; RV32IM-NEXT: mul s0, a0, a6
+; RV32IM-NEXT: xor a6, t6, s0
+; RV32IM-NEXT: sw a6, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli t6, s4, 8
+; RV32IM-NEXT: sw s11, 640(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, t6, s11
+; RV32IM-NEXT: srli s0, s4, 24
+; RV32IM-NEXT: or a6, t6, s0
+; RV32IM-NEXT: sw a6, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, s4, s11
+; RV32IM-NEXT: slli t6, t6, 8
+; RV32IM-NEXT: slli s0, s4, 24
+; RV32IM-NEXT: or a6, s0, t6
+; RV32IM-NEXT: sw a6, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli t6, a4, 8
+; RV32IM-NEXT: and t6, t6, s11
+; RV32IM-NEXT: srli s0, a4, 24
+; RV32IM-NEXT: or a6, t6, s0
+; RV32IM-NEXT: sw a6, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a4, s11
+; RV32IM-NEXT: slli t6, t6, 8
+; RV32IM-NEXT: slli s0, a4, 24
+; RV32IM-NEXT: or a6, s0, t6
+; RV32IM-NEXT: sw a6, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, a4, s10
+; RV32IM-NEXT: sw s10, 624(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, a4, a5
+; RV32IM-NEXT: mul t6, s6, a7
+; RV32IM-NEXT: mul s0, s6, s11
+; RV32IM-NEXT: xor a6, t6, s0
+; RV32IM-NEXT: sw a6, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a4, t1
+; RV32IM-NEXT: and s0, a4, t3
+; RV32IM-NEXT: mul s1, s6, t6
+; RV32IM-NEXT: mul s2, s6, s0
+; RV32IM-NEXT: xor a6, s1, s2
+; RV32IM-NEXT: sw a6, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s1, a4, t2
+; RV32IM-NEXT: sw s1, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a6, 16384
+; RV32IM-NEXT: and s7, a4, s7
+; RV32IM-NEXT: sw s7, 620(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t2, 32768
+; RV32IM-NEXT: mul s1, s6, s1
+; RV32IM-NEXT: mul s2, s6, s7
+; RV32IM-NEXT: xor s1, s1, s2
+; RV32IM-NEXT: sw s1, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s1, s5, s10
+; RV32IM-NEXT: mul s1, s4, s1
+; RV32IM-NEXT: and s2, s5, a5
+; RV32IM-NEXT: mul s2, s4, s2
+; RV32IM-NEXT: xor a5, s1, s2
+; RV32IM-NEXT: sw a5, 440(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s1, s5, t1
+; RV32IM-NEXT: mul s1, s4, s1
+; RV32IM-NEXT: and s2, s5, t3
+; RV32IM-NEXT: mul s2, s4, s2
+; RV32IM-NEXT: xor a5, s1, s2
+; RV32IM-NEXT: sw a5, 408(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s1, s5, a6
+; RV32IM-NEXT: mul s1, s4, s1
+; RV32IM-NEXT: and s2, s5, t2
+; RV32IM-NEXT: mul s2, s4, s2
+; RV32IM-NEXT: xor a5, s1, s2
+; RV32IM-NEXT: sw a5, 356(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul s1, s4, a7
+; RV32IM-NEXT: mul s2, s4, s11
+; RV32IM-NEXT: xor a5, s1, s2
+; RV32IM-NEXT: sw a5, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t6, s4, t6
+; RV32IM-NEXT: mul s0, s4, s0
+; RV32IM-NEXT: xor a5, t6, s0
+; RV32IM-NEXT: sw a5, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a4, s8
; RV32IM-NEXT: sw a5, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 380(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 372(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 368(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a6, a5
-; RV32IM-NEXT: sw a5, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 364(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 360(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 356(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 352(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: and a6, a4, t5
+; RV32IM-NEXT: sw a6, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul s0, s4, a5
+; RV32IM-NEXT: mul s1, s4, a6
+; RV32IM-NEXT: xor s0, s0, s1
+; RV32IM-NEXT: sw s0, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a5, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a6, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a5, a6, a5
+; RV32IM-NEXT: sw a5, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a5, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a5, t4, a5
+; RV32IM-NEXT: sw a5, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 2
+; RV32IM-NEXT: and a6, a1, a5
+; RV32IM-NEXT: sw a6, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul s0, a2, a6
+; RV32IM-NEXT: lw a6, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a6, s0
+; RV32IM-NEXT: sw a6, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a7, 256
+; RV32IM-NEXT: and a6, a1, a7
+; RV32IM-NEXT: sw a6, 372(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul s0, a2, a6
+; RV32IM-NEXT: lw a6, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a6, s0
+; RV32IM-NEXT: sw a6, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a6, 65536
+; RV32IM-NEXT: and t1, a1, a6
+; RV32IM-NEXT: sw t1, 360(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul s0, a2, t1
+; RV32IM-NEXT: lw t1, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, t1, s0
+; RV32IM-NEXT: sw t1, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a3, a5
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: xor a5, s9, a5
+; RV32IM-NEXT: sw a5, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a3, a7
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: xor a5, ra, a5
+; RV32IM-NEXT: sw a5, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a3, a6
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: xor a5, s3, a5
+; RV32IM-NEXT: sw a5, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a1, 2
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: andi a6, a3, 2
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: slli t1, a2, 1
+; RV32IM-NEXT: and a7, a5, t1
+; RV32IM-NEXT: sw a7, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t1, a0, 1
+; RV32IM-NEXT: and a6, a6, t1
+; RV32IM-NEXT: sw a6, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: sw a5, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a1, 4
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: andi a6, a3, 4
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: slli t1, a2, 2
+; RV32IM-NEXT: and a7, a5, t1
+; RV32IM-NEXT: sw a7, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t1, a0, 2
+; RV32IM-NEXT: and a6, a6, t1
+; RV32IM-NEXT: sw a6, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: sw a5, 496(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a1, 8
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: andi a6, a3, 8
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: slli t1, a2, 3
+; RV32IM-NEXT: and a7, a5, t1
+; RV32IM-NEXT: sw a7, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t1, a0, 3
+; RV32IM-NEXT: and a6, a6, t1
+; RV32IM-NEXT: sw a6, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: sw a5, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a1, 16
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: andi a6, a3, 16
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: slli t1, a2, 4
+; RV32IM-NEXT: and a7, a5, t1
+; RV32IM-NEXT: sw a7, 428(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t1, a0, 4
+; RV32IM-NEXT: and a6, a6, t1
+; RV32IM-NEXT: sw a6, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: sw a5, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a1, 32
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: andi a6, a3, 32
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: slli t1, a2, 5
+; RV32IM-NEXT: and a7, a5, t1
+; RV32IM-NEXT: sw a7, 400(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t1, a0, 5
+; RV32IM-NEXT: and a6, a6, t1
+; RV32IM-NEXT: sw a6, 412(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: sw a5, 424(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a1, 64
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: andi a6, a3, 64
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: slli t1, a2, 6
+; RV32IM-NEXT: and a7, a5, t1
+; RV32IM-NEXT: sw a7, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t1, a0, 6
+; RV32IM-NEXT: and a6, a6, t1
+; RV32IM-NEXT: sw a6, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: sw a5, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a1, 128
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: andi a6, a3, 128
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: slli t1, a2, 7
+; RV32IM-NEXT: and a7, a5, t1
+; RV32IM-NEXT: sw a7, 352(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t1, a0, 7
+; RV32IM-NEXT: and a6, a6, t1
+; RV32IM-NEXT: sw a6, 364(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
; RV32IM-NEXT: sw a5, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 348(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 344(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 372(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 340(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 336(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 332(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 328(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 364(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 324(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 320(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: sw a5, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a5, a3
-; RV32IM-NEXT: sw a3, 356(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 316(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a5, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a5, a3
-; RV32IM-NEXT: sw a3, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, s9
-; RV32IM-NEXT: sw a3, 352(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a3, s11, ra
-; RV32IM-NEXT: sw a3, 348(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a2, s8, a2
-; RV32IM-NEXT: sw a2, 344(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a2, s7, s6
-; RV32IM-NEXT: sw a2, 340(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a2, t4, s0
-; RV32IM-NEXT: sw a2, 336(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a2, t1, s1
-; RV32IM-NEXT: sw a2, 332(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a3, a0, 8
-; RV32IM-NEXT: and a3, a3, s10
-; RV32IM-NEXT: srli a5, a0, 24
-; RV32IM-NEXT: or a3, a3, a5
-; RV32IM-NEXT: sw a3, 328(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 2
-; RV32IM-NEXT: sw a2, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 1
-; RV32IM-NEXT: sw a2, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 4
-; RV32IM-NEXT: sw a2, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 8
-; RV32IM-NEXT: sw a2, 288(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 16
-; RV32IM-NEXT: sw a2, 284(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 32
-; RV32IM-NEXT: sw a2, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 64
-; RV32IM-NEXT: sw a2, 276(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 128
-; RV32IM-NEXT: sw a2, 272(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 256
-; RV32IM-NEXT: sw a2, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 512
-; RV32IM-NEXT: sw a2, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a4, 1024
-; RV32IM-NEXT: sw a2, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 680(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a3, a4, a2
-; RV32IM-NEXT: sw a3, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t5, 1
-; RV32IM-NEXT: and a3, a4, t5
-; RV32IM-NEXT: sw a3, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t6, 2
-; RV32IM-NEXT: and a3, a4, t6
-; RV32IM-NEXT: sw a3, 216(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s0, 4
-; RV32IM-NEXT: and a3, a4, s0
-; RV32IM-NEXT: sw a3, 156(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s1, 8
-; RV32IM-NEXT: and a3, a4, s1
-; RV32IM-NEXT: sw a3, 144(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui ra, 16
-; RV32IM-NEXT: and a3, a4, ra
-; RV32IM-NEXT: sw a3, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 32
-; RV32IM-NEXT: and a3, a4, a3
-; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 64
-; RV32IM-NEXT: and a3, a4, a3
-; RV32IM-NEXT: sw a3, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 128
-; RV32IM-NEXT: and a3, a4, a3
-; RV32IM-NEXT: sw a3, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t4, 256
-; RV32IM-NEXT: and a3, a4, t4
-; RV32IM-NEXT: sw a3, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s11, 512
-; RV32IM-NEXT: and a3, a4, s11
-; RV32IM-NEXT: sw a3, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s9, 1024
-; RV32IM-NEXT: and a3, a4, s9
-; RV32IM-NEXT: sw a3, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a1, 256
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: andi a6, a3, 256
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: slli t1, a2, 8
+; RV32IM-NEXT: and a7, a5, t1
+; RV32IM-NEXT: sw a7, 328(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t1, a0, 8
+; RV32IM-NEXT: and a6, a6, t1
+; RV32IM-NEXT: sw a6, 340(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: sw a5, 348(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a1, 512
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: andi a6, a3, 512
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: slli t1, a2, 9
+; RV32IM-NEXT: and a7, a5, t1
+; RV32IM-NEXT: sw a7, 388(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t1, a0, 9
+; RV32IM-NEXT: and a6, a6, t1
+; RV32IM-NEXT: sw a6, 404(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, t1
+; RV32IM-NEXT: sw a5, 416(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a7, 4
+; RV32IM-NEXT: lui t3, 8
+; RV32IM-NEXT: lui t4, 32
+; RV32IM-NEXT: lui s9, 512
+; RV32IM-NEXT: lui t5, 1024
; RV32IM-NEXT: lui s8, 2048
-; RV32IM-NEXT: and a3, a4, s8
-; RV32IM-NEXT: sw a3, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s7, 4096
-; RV32IM-NEXT: and a3, a4, s7
-; RV32IM-NEXT: sw a3, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t3, 8192
-; RV32IM-NEXT: and a3, a4, t3
-; RV32IM-NEXT: sw a3, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t2, 16384
-; RV32IM-NEXT: and a3, a4, t2
-; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t1, 32768
-; RV32IM-NEXT: and a3, a4, t1
-; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t0, 65536
-; RV32IM-NEXT: and a3, a4, t0
-; RV32IM-NEXT: sw a3, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a6, 131072
-; RV32IM-NEXT: and a3, a4, a6
-; RV32IM-NEXT: sw a3, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s10, 262144
-; RV32IM-NEXT: and a3, a4, s10
-; RV32IM-NEXT: sw a3, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a3, 524288
-; RV32IM-NEXT: and a4, a4, a3
-; RV32IM-NEXT: sw a4, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 2
-; RV32IM-NEXT: sw a4, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 1
-; RV32IM-NEXT: sw a4, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 4
-; RV32IM-NEXT: sw a4, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 8
-; RV32IM-NEXT: sw a4, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 16
-; RV32IM-NEXT: sw a4, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 32
-; RV32IM-NEXT: sw a4, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 64
-; RV32IM-NEXT: sw a4, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 128
-; RV32IM-NEXT: sw a4, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 256
-; RV32IM-NEXT: sw a4, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a7, 512
-; RV32IM-NEXT: andi s6, a7, 1024
-; RV32IM-NEXT: and s5, a7, a2
-; RV32IM-NEXT: and s4, a7, t5
-; RV32IM-NEXT: and s2, a7, t6
-; RV32IM-NEXT: and s3, a7, s0
-; RV32IM-NEXT: and s1, a7, s1
-; RV32IM-NEXT: and s0, a7, ra
-; RV32IM-NEXT: lui a2, 32
-; RV32IM-NEXT: and t6, a7, a2
-; RV32IM-NEXT: lui a2, 64
-; RV32IM-NEXT: and t5, a7, a2
-; RV32IM-NEXT: lui a2, 128
-; RV32IM-NEXT: and ra, a7, a2
-; RV32IM-NEXT: and t4, a7, t4
-; RV32IM-NEXT: and a2, a7, s11
-; RV32IM-NEXT: and s11, a7, s9
-; RV32IM-NEXT: and s9, a7, s8
-; RV32IM-NEXT: and s7, a7, s7
-; RV32IM-NEXT: and s8, a7, t3
-; RV32IM-NEXT: and t3, a7, t2
-; RV32IM-NEXT: and t2, a7, t1
-; RV32IM-NEXT: and t1, a7, t0
-; RV32IM-NEXT: and t0, a7, a6
-; RV32IM-NEXT: and a6, a7, s10
-; RV32IM-NEXT: and s10, a7, a3
-; RV32IM-NEXT: lw a3, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a3
-; RV32IM-NEXT: sw a3, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a3
-; RV32IM-NEXT: sw a3, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a3
-; RV32IM-NEXT: sw a3, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a3
-; RV32IM-NEXT: sw a3, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a3
-; RV32IM-NEXT: sw a3, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a3
-; RV32IM-NEXT: sw a3, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a3
-; RV32IM-NEXT: sw a3, 168(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a3, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a1, a3
-; RV32IM-NEXT: sw a3, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, a4
-; RV32IM-NEXT: sw a3, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, a5
-; RV32IM-NEXT: mv a4, a5
-; RV32IM-NEXT: sw a3, 160(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s6
-; RV32IM-NEXT: sw a3, 188(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s5
-; RV32IM-NEXT: sw a3, 204(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s4
-; RV32IM-NEXT: sw a3, 200(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s2
-; RV32IM-NEXT: sw a3, 152(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s3
-; RV32IM-NEXT: sw a3, 184(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s1
-; RV32IM-NEXT: sw a3, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s0
-; RV32IM-NEXT: sw a3, 196(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, t6
-; RV32IM-NEXT: sw a3, 192(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, t5
-; RV32IM-NEXT: sw a3, 148(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, ra
-; RV32IM-NEXT: sw a3, 180(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, t4
-; RV32IM-NEXT: sw a3, 296(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, a2
-; RV32IM-NEXT: sw a3, 320(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s11
-; RV32IM-NEXT: sw a3, 172(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s9
-; RV32IM-NEXT: sw a3, 164(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s7
-; RV32IM-NEXT: sw a3, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, s8
-; RV32IM-NEXT: sw a3, 176(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, t3
-; RV32IM-NEXT: sw a3, 292(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, t2
-; RV32IM-NEXT: sw a3, 316(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, t1
-; RV32IM-NEXT: sw a3, 324(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, t0
-; RV32IM-NEXT: sw a3, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a1, a6
-; RV32IM-NEXT: sw a3, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a1, s10
-; RV32IM-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mv a5, s10
-; RV32IM-NEXT: lw a1, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 288(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 284(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 276(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 276(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 156(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 272(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 288(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 284(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s10, a0, a1
-; RV32IM-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lui a5, 131072
+; RV32IM-NEXT: lui t6, 262144
+; RV32IM-NEXT: lui s7, 524288
+; RV32IM-NEXT: andi a6, a1, 1
+; RV32IM-NEXT: andi t1, a1, 1024
+; RV32IM-NEXT: sw t1, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t1, a1, a7
+; RV32IM-NEXT: and s0, a1, t3
+; RV32IM-NEXT: lui t2, 16
+; RV32IM-NEXT: and s1, a1, t2
+; RV32IM-NEXT: and s2, a1, t4
+; RV32IM-NEXT: and s3, a1, s9
+; RV32IM-NEXT: and s10, a1, t5
+; RV32IM-NEXT: and s11, a1, s8
+; RV32IM-NEXT: and ra, a1, a5
+; RV32IM-NEXT: and a5, a1, t6
+; RV32IM-NEXT: and a1, a1, s7
+; RV32IM-NEXT: seqz a6, a6
+; RV32IM-NEXT: mul t0, a2, t0
+; RV32IM-NEXT: sw t0, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t0, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul t0, a2, t0
+; RV32IM-NEXT: sw t0, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t0, a3, 1
+; RV32IM-NEXT: seqz t0, t0
+; RV32IM-NEXT: and t6, a3, a7
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 284(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a3, t3
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 336(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a3, t2
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 432(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a3, t4
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a3, s9
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 280(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a3, t5
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 324(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a3, s8
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a7, 4096
+; RV32IM-NEXT: and t6, a3, a7
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a7, 8192
+; RV32IM-NEXT: and t6, a3, a7
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 512(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a7, 131072
+; RV32IM-NEXT: and t6, a3, a7
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 272(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a7, 262144
+; RV32IM-NEXT: and t6, a3, a7
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 312(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a3, s7
+; RV32IM-NEXT: mul a7, a0, t6
+; RV32IM-NEXT: sw a7, 384(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul t4, a0, a7
+; RV32IM-NEXT: mul a7, a2, t1
+; RV32IM-NEXT: sw a7, 260(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a0, t1
+; RV32IM-NEXT: sw a7, 268(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a2, s0
+; RV32IM-NEXT: sw a7, 296(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a0, s0
+; RV32IM-NEXT: sw a7, 308(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a2, s1
+; RV32IM-NEXT: sw a7, 368(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a0, s1
+; RV32IM-NEXT: sw a7, 380(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a2, s2
+; RV32IM-NEXT: sw a7, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a0, s2
+; RV32IM-NEXT: sw a7, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 372(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul t1, a0, a7
+; RV32IM-NEXT: mul a7, a2, s3
+; RV32IM-NEXT: sw a7, 252(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a0, s3
+; RV32IM-NEXT: sw a7, 264(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a2, s10
+; RV32IM-NEXT: sw a7, 288(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a0, s10
+; RV32IM-NEXT: sw a7, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a2, s11
+; RV32IM-NEXT: sw a7, 344(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a0, s11
+; RV32IM-NEXT: sw a7, 372(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul t5, a0, a7
+; RV32IM-NEXT: lw a7, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a7, a0, a7
+; RV32IM-NEXT: sw a7, 256(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a7, a0, a7
+; RV32IM-NEXT: sw a7, 292(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a2, ra
+; RV32IM-NEXT: sw a7, 248(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a0, ra
+; RV32IM-NEXT: sw a7, 360(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a2, a5
+; RV32IM-NEXT: sw a7, 276(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: sw a5, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, a2, a1
+; RV32IM-NEXT: sw a5, 332(sp) # 4-byte Folded Spill
; RV32IM-NEXT: mul a1, a0, a1
+; RV32IM-NEXT: sw a1, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi a6, a6, -1
+; RV32IM-NEXT: addi t0, t0, -1
+; RV32IM-NEXT: slli a1, a2, 10
+; RV32IM-NEXT: and a2, a6, a2
+; RV32IM-NEXT: sw a2, 228(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, t0, a0
+; RV32IM-NEXT: sw a2, 240(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, a6, a0
+; RV32IM-NEXT: sw a2, 244(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a0, a0, 10
+; RV32IM-NEXT: andi a2, a3, 1024
+; RV32IM-NEXT: lw a3, 320(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: and a1, a3, a1
; RV32IM-NEXT: sw a1, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a4
-; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s6, a0, s6
-; RV32IM-NEXT: mul a1, a0, s5
-; RV32IM-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s4
-; RV32IM-NEXT: sw a1, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, a0, s2
-; RV32IM-NEXT: mul s3, a0, s3
-; RV32IM-NEXT: mul s1, a0, s1
-; RV32IM-NEXT: mul a1, a0, s0
-; RV32IM-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t6
-; RV32IM-NEXT: sw a1, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t5
-; RV32IM-NEXT: sw a1, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t6, a0, ra
-; RV32IM-NEXT: mul ra, a0, t4
-; RV32IM-NEXT: mul t5, a0, a2
-; RV32IM-NEXT: mul a1, a0, s11
-; RV32IM-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s9
-; RV32IM-NEXT: sw a1, 156(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s7
-; RV32IM-NEXT: mul a4, a0, s8
-; RV32IM-NEXT: mul t3, a0, t3
-; RV32IM-NEXT: mul a1, a0, t2
-; RV32IM-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t1
-; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t0
-; RV32IM-NEXT: sw a1, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a6
-; RV32IM-NEXT: sw a1, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a5
-; RV32IM-NEXT: sw a1, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli a1, a0, 24
-; RV32IM-NEXT: lw t4, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, a0, t4
-; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a5, a1, a0
-; RV32IM-NEXT: srli a0, a7, 8
-; RV32IM-NEXT: and a0, a0, t4
-; RV32IM-NEXT: srli a2, a7, 24
-; RV32IM-NEXT: or a6, a0, a2
-; RV32IM-NEXT: slli a2, a7, 24
-; RV32IM-NEXT: and a7, a7, t4
-; RV32IM-NEXT: slli a7, a7, 8
-; RV32IM-NEXT: or a2, a2, a7
-; RV32IM-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a1, a0
-; RV32IM-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, a0, a1
-; RV32IM-NEXT: lw a0, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, a0, a1
-; RV32IM-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, a0, a1
-; RV32IM-NEXT: lw a0, 204(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, a0, a1
-; RV32IM-NEXT: lw a0, 196(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s4, a0, a1
-; RV32IM-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s5, a0, a1
-; RV32IM-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, a0, a1
-; RV32IM-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, a1, a0
-; RV32IM-NEXT: lw a0, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s9, a0, a1
-; RV32IM-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, a0, a1
-; RV32IM-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: and a2, a2, a0
+; RV32IM-NEXT: sw a2, 316(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a3, a0
+; RV32IM-NEXT: sw a0, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, t4
; RV32IM-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, t1
; RV32IM-NEXT: sw a0, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, s10, a0
-; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, a1, a0
-; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor s2, s2, s3
-; RV32IM-NEXT: xor t6, t6, ra
-; RV32IM-NEXT: xor a3, a3, a4
-; RV32IM-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, t5
+; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a0, a1
-; RV32IM-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, a1, a0
-; RV32IM-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a3, 2
+; RV32IM-NEXT: and s7, a4, a3
+; RV32IM-NEXT: mul a0, s6, s7
+; RV32IM-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a2, 256
+; RV32IM-NEXT: and a0, a4, a2
+; RV32IM-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s6, a0
+; RV32IM-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a1, 65536
+; RV32IM-NEXT: and a0, a4, a1
+; RV32IM-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s6, a0
+; RV32IM-NEXT: lw a5, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a5, a0
+; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, s5, a3
+; RV32IM-NEXT: mul a0, s4, a0
+; RV32IM-NEXT: lw a3, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a3, a0
+; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, s5, a2
+; RV32IM-NEXT: mul a0, s4, a0
+; RV32IM-NEXT: lw a2, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a2, a0
+; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, s5, a1
+; RV32IM-NEXT: mul a0, s4, a0
+; RV32IM-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a4, 2
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s5, 2
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a5, s6, 1
+; RV32IM-NEXT: and a5, a0, a5
+; RV32IM-NEXT: sw a5, 220(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a5, s4, 1
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: sw a1, 356(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a4, 4
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s5, 4
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a5, s6, 2
+; RV32IM-NEXT: and a5, a0, a5
+; RV32IM-NEXT: sw a5, 204(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a5, s4, 2
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a4, 8
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s5, 8
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a5, s6, 3
+; RV32IM-NEXT: and a5, a0, a5
+; RV32IM-NEXT: sw a5, 172(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a5, s4, 3
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: sw a0, 200(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a4, 16
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s5, 16
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a5, s6, 4
+; RV32IM-NEXT: and a5, a0, a5
+; RV32IM-NEXT: sw a5, 156(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a5, s4, 4
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: sw a0, 176(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a4, 32
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s5, 32
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a5, s6, 5
+; RV32IM-NEXT: and a5, a0, a5
+; RV32IM-NEXT: sw a5, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a5, s4, 5
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a4, 64
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s5, 64
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a5, s6, 6
+; RV32IM-NEXT: and a5, a0, a5
+; RV32IM-NEXT: sw a5, 192(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a5, s4, 6
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: sw a1, 196(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a4, 128
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s5, 128
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a5, s6, 7
+; RV32IM-NEXT: and a5, a0, a5
+; RV32IM-NEXT: sw a5, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a5, s4, 7
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a4, 256
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s5, 256
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a5, s6, 8
+; RV32IM-NEXT: and a5, a0, a5
+; RV32IM-NEXT: sw a5, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a5, s4, 8
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a4, 512
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s5, 512
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a5, s6, 9
+; RV32IM-NEXT: and a5, a0, a5
+; RV32IM-NEXT: sw a5, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a5, s4, 9
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a1, a4, 1
+; RV32IM-NEXT: andi a0, a4, 1024
+; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s10, 4
+; RV32IM-NEXT: and a5, a4, s10
+; RV32IM-NEXT: lui ra, 8
+; RV32IM-NEXT: and t4, a4, ra
+; RV32IM-NEXT: lui t5, 16
+; RV32IM-NEXT: and t6, a4, t5
+; RV32IM-NEXT: lui s9, 32
+; RV32IM-NEXT: and s2, a4, s9
+; RV32IM-NEXT: lui s1, 512
+; RV32IM-NEXT: and a3, a4, s1
+; RV32IM-NEXT: lui s3, 1024
+; RV32IM-NEXT: and a0, a4, s3
+; RV32IM-NEXT: and a2, a4, s8
+; RV32IM-NEXT: lui t0, 131072
+; RV32IM-NEXT: and a6, a4, t0
+; RV32IM-NEXT: lui t1, 262144
+; RV32IM-NEXT: and s0, a4, t1
+; RV32IM-NEXT: lui t2, 524288
+; RV32IM-NEXT: and a4, a4, t2
+; RV32IM-NEXT: seqz s11, a1
+; RV32IM-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a1, s6, a1
+; RV32IM-NEXT: sw a1, 440(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a1, s6, a1
+; RV32IM-NEXT: sw a1, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, s5, 1
+; RV32IM-NEXT: seqz t3, a7
+; RV32IM-NEXT: and a7, s5, s10
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, s5, ra
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, s5, t5
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 188(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, s5, s9
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, s5, s1
+; RV32IM-NEXT: mul ra, s4, a7
+; RV32IM-NEXT: and a7, s5, s3
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, s5, s8
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a1, 4096
+; RV32IM-NEXT: and a7, s5, a1
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a1, 8192
+; RV32IM-NEXT: and a7, s5, a1
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 396(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, s5, t0
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, s5, t1
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, s5, t2
+; RV32IM-NEXT: mul a1, s4, a7
+; RV32IM-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, s4, s7
+; RV32IM-NEXT: mul a7, s6, a5
+; RV32IM-NEXT: mul a5, s4, a5
+; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s6, t4
+; RV32IM-NEXT: sw a5, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s4, t4
+; RV32IM-NEXT: sw a5, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s6, t6
+; RV32IM-NEXT: sw a5, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s4, t6
+; RV32IM-NEXT: sw a5, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s6, s2
+; RV32IM-NEXT: sw a5, 164(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s4, s2
+; RV32IM-NEXT: sw a5, 180(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a5, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul s7, s4, a5
+; RV32IM-NEXT: mul t2, s6, a3
+; RV32IM-NEXT: mul a5, s4, a3
+; RV32IM-NEXT: mul a3, s6, a0
+; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s4, a0
+; RV32IM-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s6, a2
+; RV32IM-NEXT: sw a0, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s4, a2
+; RV32IM-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a0, s4, a0
+; RV32IM-NEXT: lw a2, 620(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a2, s4, a2
+; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 144(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a2, s4, a2
+; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a3, s6, a6
+; RV32IM-NEXT: mul a2, s4, a6
+; RV32IM-NEXT: sw a2, 144(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s6, s0
+; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s4, s0
+; RV32IM-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s6, a4
+; RV32IM-NEXT: sw a2, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s4, a4
+; RV32IM-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi t1, s11, -1
+; RV32IM-NEXT: addi t0, t3, -1
+; RV32IM-NEXT: slli a4, s6, 10
+; RV32IM-NEXT: and a6, t1, s6
+; RV32IM-NEXT: and t0, t0, s4
+; RV32IM-NEXT: and a2, t1, s4
+; RV32IM-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli s4, s4, 10
+; RV32IM-NEXT: andi s5, s5, 1024
+; RV32IM-NEXT: lw a2, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: seqz t3, a2
+; RV32IM-NEXT: seqz s5, s5
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: addi s5, s5, -1
+; RV32IM-NEXT: and a2, t3, a4
+; RV32IM-NEXT: sw a2, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, s5, s4
+; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, t3, s4
+; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a2, a1
+; RV32IM-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, s7
+; RV32IM-NEXT: sw a1, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 328(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 348(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 344(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 340(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 336(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 328(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a5, a0
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: or a0, a2, a6
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a2, a7, t1
-; RV32IM-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, t2, a0
-; RV32IM-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, t4, a0
-; RV32IM-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, s0, a0
-; RV32IM-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, s4, a0
-; RV32IM-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s5, a0
-; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s7, a0
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t0, s9
+; RV32IM-NEXT: lw a0, 364(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 340(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s11, a0
-; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 284(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s10, a0
-; RV32IM-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 280(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s8, a0
-; RV32IM-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, a0, a1
+; RV32IM-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, a0, a1
+; RV32IM-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 348(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s8, a0, a1
+; RV32IM-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor s9, a1, a0
-; RV32IM-NEXT: lw a0, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, a0, s6
-; RV32IM-NEXT: xor s11, s2, s1
-; RV32IM-NEXT: xor ra, t6, t5
-; RV32IM-NEXT: xor s5, a3, t3
-; RV32IM-NEXT: xor a0, a4, s3
-; RV32IM-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s10, a1, a0
+; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, a1, a0
+; RV32IM-NEXT: lw a0, 220(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a6, a0
; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 204(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, a0, a1
+; RV32IM-NEXT: lw a0, 156(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, a0, s0
+; RV32IM-NEXT: lw a0, 96(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, a0, s1
+; RV32IM-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s2, a0, a7
; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, a1, a0
-; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, a1, a0
-; RV32IM-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, a1, a0
-; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, a1, a0
-; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, a1, a0
-; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, a1, a0
-; RV32IM-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, a0, a1
-; RV32IM-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, s0, a0
-; RV32IM-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s1, a0
-; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, a1, a0
-; RV32IM-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, a1, a0
-; RV32IM-NEXT: xor s4, a2, a5
-; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a6, a0
-; RV32IM-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a7, a0
-; RV32IM-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, t1, a0
-; RV32IM-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, a1, a0
-; RV32IM-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, a0, t2
+; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, a0, a3
+; RV32IM-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, t0, a0
+; RV32IM-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a0, a1
+; RV32IM-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, a0, a1
+; RV32IM-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor t1, a0, a1
-; RV32IM-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a0, a1
+; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a0, ra
+; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a0, a1
+; RV32IM-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a1, a0
+; RV32IM-NEXT: lw a0, 224(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: lw a1, 176(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 148(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: lw a2, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: lw a3, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a4, a3
+; RV32IM-NEXT: lw a4, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a5
+; RV32IM-NEXT: lw a5, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw ra, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, ra, a5
+; RV32IM-NEXT: lw ra, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, ra, s5
+; RV32IM-NEXT: sw ra, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 388(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 296(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 288(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 276(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, ra, s5
+; RV32IM-NEXT: sw ra, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 412(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 336(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 428(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 324(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 424(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 520(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 412(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s5, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, s5, s6
+; RV32IM-NEXT: sw s6, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s6, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, s7, s6
+; RV32IM-NEXT: lw s6, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s8, s8, s6
+; RV32IM-NEXT: lw s6, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s9, s9, s6
+; RV32IM-NEXT: lw s6, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s10, s10, s6
+; RV32IM-NEXT: lw s6, 292(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, s11, s6
+; RV32IM-NEXT: lw s5, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s5, t6
+; RV32IM-NEXT: sw t6, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 192(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s0, t6
+; RV32IM-NEXT: sw t6, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s1, t6
+; RV32IM-NEXT: sw t6, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s2, t6
+; RV32IM-NEXT: sw t6, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s3, t6
+; RV32IM-NEXT: sw t6, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s4, t6
+; RV32IM-NEXT: sw t6, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a7, t5, a7
+; RV32IM-NEXT: sw a7, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 196(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t0, a7
+; RV32IM-NEXT: sw a7, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t1, a7
+; RV32IM-NEXT: sw a7, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t2, a7
+; RV32IM-NEXT: sw a7, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t3, a7
+; RV32IM-NEXT: sw a7, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t4, a7
+; RV32IM-NEXT: sw a7, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a6, a0
+; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 208(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a2, a0
+; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a3, a0
+; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a4, a0
+; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a5, a0
+; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a3, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a2, a3, 4
+; RV32IM-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a3, a3, a0
+; RV32IM-NEXT: and a2, a2, a0
+; RV32IM-NEXT: slli a3, a3, 4
+; RV32IM-NEXT: or a2, a2, a3
+; RV32IM-NEXT: sw a2, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a3, a1, 4
+; RV32IM-NEXT: and t3, a1, a0
+; RV32IM-NEXT: and a3, a3, a0
+; RV32IM-NEXT: slli t3, t3, 4
+; RV32IM-NEXT: or ra, a3, t3
+; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, a1, a2
+; RV32IM-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, a2, a1
+; RV32IM-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, a2, a1
+; RV32IM-NEXT: lw a1, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, a2, a1
+; RV32IM-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 448(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: lw a2, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a3, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: lw a3, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a4, a3
-; RV32IM-NEXT: lw a4, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s9, a4, s9
-; RV32IM-NEXT: lw a4, 144(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, s10, a4
-; RV32IM-NEXT: lw a4, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, s11, a4
-; RV32IM-NEXT: lw a4, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor ra, ra, a4
-; RV32IM-NEXT: lw a4, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, s5, a4
-; RV32IM-NEXT: lw s5, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s5, s5, s6
-; RV32IM-NEXT: sw s5, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s5, 612(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s6, s5
-; RV32IM-NEXT: lw s5, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, s7, s5
-; RV32IM-NEXT: lw s5, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, s8, s5
-; RV32IM-NEXT: lw s5, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s5, s5, t2
-; RV32IM-NEXT: lw t2, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t3, t2
-; RV32IM-NEXT: lw t2, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t4, t2
-; RV32IM-NEXT: lw t2, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t5, t2
-; RV32IM-NEXT: lw t5, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, t6, t5
-; RV32IM-NEXT: lw t5, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, s0, t5
-; RV32IM-NEXT: lw t5, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s1, t5
-; RV32IM-NEXT: lw t5, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, s2, t5
-; RV32IM-NEXT: sw t5, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw t5, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, s3, t5
-; RV32IM-NEXT: xor t5, s4, a5
-; RV32IM-NEXT: lw a5, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, a6, a5
-; RV32IM-NEXT: lw a5, 296(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s4, a7, a5
-; RV32IM-NEXT: lw a5, 292(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, a5
-; RV32IM-NEXT: xor a6, t1, a0
-; RV32IM-NEXT: lw a5, 288(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a1, a5
-; RV32IM-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a0
-; RV32IM-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a0
-; RV32IM-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, s9, a0
-; RV32IM-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, s10, a0
-; RV32IM-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s9, s11, a0
-; RV32IM-NEXT: lw a0, 156(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, ra, a0
-; RV32IM-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a0
-; RV32IM-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s10, a1, 4
-; RV32IM-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and s11, a1, a0
-; RV32IM-NEXT: and s10, s10, a0
-; RV32IM-NEXT: slli s11, s11, 4
-; RV32IM-NEXT: or s10, s10, s11
+; RV32IM-NEXT: sw a1, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s2, a1, a2
+; RV32IM-NEXT: lw a1, 316(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, a2, a1
; RV32IM-NEXT: lw a1, 432(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s11, a1, 4
-; RV32IM-NEXT: and ra, a1, a0
-; RV32IM-NEXT: and s11, s11, a0
-; RV32IM-NEXT: mv a1, a0
-; RV32IM-NEXT: slli ra, ra, 4
-; RV32IM-NEXT: or s11, s11, ra
-; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, a0, s6
-; RV32IM-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, s7, a0
-; RV32IM-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, s8, a0
-; RV32IM-NEXT: xor t3, s5, t3
-; RV32IM-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t4, a0
-; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t2, a0
-; RV32IM-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s1, a0
-; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, s2, a0
-; RV32IM-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s5, a0, 4
-; RV32IM-NEXT: and ra, a0, a1
-; RV32IM-NEXT: and s5, s5, a1
-; RV32IM-NEXT: slli ra, ra, 4
-; RV32IM-NEXT: or s5, s5, ra
-; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli ra, a0, 4
-; RV32IM-NEXT: and a0, a0, a1
-; RV32IM-NEXT: and ra, ra, a1
-; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: or a0, ra, a0
-; RV32IM-NEXT: xor t5, t5, s3
+; RV32IM-NEXT: lw s0, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s0, a1
+; RV32IM-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, a1
+; RV32IM-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 412(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: sw a1, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, a1, s7
; RV32IM-NEXT: lw a1, 320(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, s4, a1
-; RV32IM-NEXT: lw a1, 316(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, a1
-; RV32IM-NEXT: xor a1, a6, a5
-; RV32IM-NEXT: lw a5, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a5
-; RV32IM-NEXT: lw a5, 304(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a5
-; RV32IM-NEXT: lw a5, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, s9, a5
-; RV32IM-NEXT: lw a6, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a6
-; RV32IM-NEXT: xor s4, s6, s7
-; RV32IM-NEXT: lw s6, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s8, s6
-; RV32IM-NEXT: xor t3, t3, t4
-; RV32IM-NEXT: lw t4, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t2, t4
-; RV32IM-NEXT: mv s7, t6
-; RV32IM-NEXT: xor t4, t6, s0
-; RV32IM-NEXT: xor t4, t4, s1
-; RV32IM-NEXT: lw t6, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, s2, t6
-; RV32IM-NEXT: xor t5, t5, s3
-; RV32IM-NEXT: lw a6, 324(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, a6
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s8, s8, a1
+; RV32IM-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s9, s9, a1
+; RV32IM-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, s10, a1
+; RV32IM-NEXT: sw a1, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, s11, a1
+; RV32IM-NEXT: lw a1, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t3, a1, 4
+; RV32IM-NEXT: and t4, a1, a0
+; RV32IM-NEXT: and t3, t3, a0
+; RV32IM-NEXT: slli t4, t4, 4
+; RV32IM-NEXT: or t5, t3, t4
+; RV32IM-NEXT: lw a1, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t4, a1, 4
+; RV32IM-NEXT: and s10, a1, a0
+; RV32IM-NEXT: and t4, t4, a0
+; RV32IM-NEXT: slli s10, s10, 4
+; RV32IM-NEXT: or t4, t4, s10
+; RV32IM-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a0, a1
+; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, a1, a0
+; RV32IM-NEXT: lw a0, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a1, a0
+; RV32IM-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, a1, a0
+; RV32IM-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a0, a1
+; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a1, a0
+; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a4, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a0
+; RV32IM-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a0
+; RV32IM-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 520(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a0, a3
+; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a0
+; RV32IM-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a0
+; RV32IM-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, s10, a0
+; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, s10, a0
+; RV32IM-NEXT: xor s5, s6, s5
+; RV32IM-NEXT: lw s6, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, s3, s6
+; RV32IM-NEXT: lw s6, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, s4, s6
+; RV32IM-NEXT: xor s2, s2, t6
+; RV32IM-NEXT: lw t6, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s0, t6
+; RV32IM-NEXT: lw t6, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, t6
+; RV32IM-NEXT: xor t6, s7, s8
+; RV32IM-NEXT: lw s6, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, s9, s6
+; RV32IM-NEXT: lw s7, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, s11, s7
+; RV32IM-NEXT: xor t1, t3, t1
+; RV32IM-NEXT: lw t3, 164(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, t2, t3
+; RV32IM-NEXT: lw t3, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, t3
+; RV32IM-NEXT: xor a6, a7, a6
+; RV32IM-NEXT: lw a7, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a7
+; RV32IM-NEXT: lw a7, 216(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: xor a1, a3, a1
+; RV32IM-NEXT: lw a3, 180(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: lw a3, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a3
+; RV32IM-NEXT: xor a3, s5, s3
+; RV32IM-NEXT: lw a7, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, s4, a7
+; RV32IM-NEXT: xor t3, s2, s0
+; RV32IM-NEXT: lw s0, 512(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s1, s0
+; RV32IM-NEXT: lw s1, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s7, s1
+; RV32IM-NEXT: xor t1, t1, t2
+; RV32IM-NEXT: lw t2, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, t2
+; RV32IM-NEXT: xor a6, a6, a4
+; RV32IM-NEXT: lw a4, 396(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a4
+; RV32IM-NEXT: lw a4, 620(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a0, a4
+; RV32IM-NEXT: lw t2, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a0, t2, 2
+; RV32IM-NEXT: lw s3, 628(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and t2, t2, s3
+; RV32IM-NEXT: and a0, a0, s3
+; RV32IM-NEXT: slli t2, t2, 2
+; RV32IM-NEXT: or a0, a0, t2
+; RV32IM-NEXT: srli t2, ra, 2
+; RV32IM-NEXT: and s2, ra, s3
+; RV32IM-NEXT: and t2, t2, s3
+; RV32IM-NEXT: slli s2, s2, 2
+; RV32IM-NEXT: or t2, t2, s2
+; RV32IM-NEXT: xor a3, a3, a7
+; RV32IM-NEXT: xor a7, t3, s0
+; RV32IM-NEXT: xor t3, t6, s6
+; RV32IM-NEXT: lw s0, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, t3, s0
+; RV32IM-NEXT: srli s0, t5, 2
+; RV32IM-NEXT: and t5, t5, s3
+; RV32IM-NEXT: and s0, s0, s3
+; RV32IM-NEXT: slli t5, t5, 2
+; RV32IM-NEXT: or t5, s0, t5
+; RV32IM-NEXT: srli s0, t4, 2
+; RV32IM-NEXT: and t4, t4, s3
+; RV32IM-NEXT: and s0, s0, s3
+; RV32IM-NEXT: mv s2, s3
+; RV32IM-NEXT: slli t4, t4, 2
+; RV32IM-NEXT: or t4, s0, t4
+; RV32IM-NEXT: xor t0, t1, t0
+; RV32IM-NEXT: xor a5, a6, a5
+; RV32IM-NEXT: xor a2, a1, a2
+; RV32IM-NEXT: lw a6, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a2, a6
+; RV32IM-NEXT: lw a2, 612(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: xor a3, a7, t1
-; RV32IM-NEXT: xor a3, a3, a5
-; RV32IM-NEXT: lw a5, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: xor a5, s4, s6
-; RV32IM-NEXT: xor t1, t3, t2
-; RV32IM-NEXT: lw a6, 612(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t4, a6
-; RV32IM-NEXT: lw t3, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t6, t3
-; RV32IM-NEXT: xor t0, t5, t0
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 664(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a7, a3
+; RV32IM-NEXT: lw a7, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t0, a7
+; RV32IM-NEXT: lw t0, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, t0
; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: lw a3, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a4, a3
-; RV32IM-NEXT: srli a4, s10, 2
-; RV32IM-NEXT: lw t6, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a6, s10, t6
-; RV32IM-NEXT: and a4, a4, t6
-; RV32IM-NEXT: slli a6, a6, 2
-; RV32IM-NEXT: or a4, a4, a6
-; RV32IM-NEXT: srli a6, s11, 2
-; RV32IM-NEXT: and t4, s11, t6
-; RV32IM-NEXT: and a6, a6, t6
-; RV32IM-NEXT: slli t4, t4, 2
-; RV32IM-NEXT: or a6, a6, t4
-; RV32IM-NEXT: lw t4, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, t4
-; RV32IM-NEXT: lw t4, 620(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t4
-; RV32IM-NEXT: srli t4, s5, 2
-; RV32IM-NEXT: and t5, s5, t6
-; RV32IM-NEXT: and t4, t4, t6
-; RV32IM-NEXT: slli t5, t5, 2
-; RV32IM-NEXT: or t4, t4, t5
-; RV32IM-NEXT: srli t5, a0, 2
-; RV32IM-NEXT: and a0, a0, t6
-; RV32IM-NEXT: and t5, t5, t6
-; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or a0, t5, a0
-; RV32IM-NEXT: lw t5, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, t5
-; RV32IM-NEXT: lw t5, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, t5
-; RV32IM-NEXT: xor a5, t1, a5
-; RV32IM-NEXT: sw a5, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a1, a1, t0
-; RV32IM-NEXT: sw a1, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli a1, s7, 24
-; RV32IM-NEXT: lw t1, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a5, t2, t1
-; RV32IM-NEXT: slli a5, a5, 8
-; RV32IM-NEXT: or a5, a1, a5
-; RV32IM-NEXT: xor a1, t2, t3
-; RV32IM-NEXT: srli t0, t2, 8
-; RV32IM-NEXT: and t0, t0, t1
-; RV32IM-NEXT: srli a1, a1, 24
-; RV32IM-NEXT: or t0, t0, a1
-; RV32IM-NEXT: slli a7, a7, 24
-; RV32IM-NEXT: and a1, a2, t1
-; RV32IM-NEXT: slli a1, a1, 8
-; RV32IM-NEXT: or a7, a7, a1
-; RV32IM-NEXT: xor a3, a2, a3
-; RV32IM-NEXT: srli a2, a2, 8
-; RV32IM-NEXT: and a1, a2, t1
-; RV32IM-NEXT: srli a3, a3, 24
-; RV32IM-NEXT: or a3, a1, a3
-; RV32IM-NEXT: srli a1, a4, 1
-; RV32IM-NEXT: lw t1, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a2, a4, t1
-; RV32IM-NEXT: and a1, a1, t1
-; RV32IM-NEXT: slli a2, a2, 1
-; RV32IM-NEXT: or a1, a1, a2
-; RV32IM-NEXT: srli a2, a6, 1
-; RV32IM-NEXT: and a4, a6, t1
-; RV32IM-NEXT: and a2, a2, t1
-; RV32IM-NEXT: slli a4, a4, 1
-; RV32IM-NEXT: or a2, a2, a4
-; RV32IM-NEXT: or a4, a5, t0
-; RV32IM-NEXT: sw a4, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a4, t4, 1
-; RV32IM-NEXT: and a5, t4, t1
-; RV32IM-NEXT: and a4, a4, t1
-; RV32IM-NEXT: slli a5, a5, 1
-; RV32IM-NEXT: or t5, a4, a5
-; RV32IM-NEXT: srli a4, a0, 1
+; RV32IM-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a2, a5, a7
+; RV32IM-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli a2, a0, 1
+; RV32IM-NEXT: lw t1, 644(sp) # 4-byte Folded Reload
; RV32IM-NEXT: and a0, a0, t1
-; RV32IM-NEXT: and a4, a4, t1
+; RV32IM-NEXT: and a2, a2, t1
; RV32IM-NEXT: slli a0, a0, 1
-; RV32IM-NEXT: or ra, a4, a0
-; RV32IM-NEXT: or a0, a7, a3
-; RV32IM-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a2, 2
-; RV32IM-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a2, 1
-; RV32IM-NEXT: andi a4, a2, 4
-; RV32IM-NEXT: andi a5, a2, 8
-; RV32IM-NEXT: andi a6, a2, 16
-; RV32IM-NEXT: andi a7, a2, 32
-; RV32IM-NEXT: andi t1, a2, 64
-; RV32IM-NEXT: andi t2, a2, 128
-; RV32IM-NEXT: andi t3, a2, 256
-; RV32IM-NEXT: andi t4, a2, 512
-; RV32IM-NEXT: andi t6, a2, 1024
-; RV32IM-NEXT: lw s0, 680(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, a2, s0
-; RV32IM-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s1, 1
-; RV32IM-NEXT: and s1, a2, s1
-; RV32IM-NEXT: lui a0, 2
-; RV32IM-NEXT: and s3, a2, a0
-; RV32IM-NEXT: lui a0, 4
-; RV32IM-NEXT: and s9, a2, a0
-; RV32IM-NEXT: lui a0, 8
-; RV32IM-NEXT: and s10, a2, a0
-; RV32IM-NEXT: lui a0, 16
-; RV32IM-NEXT: and s11, a2, a0
-; RV32IM-NEXT: lui a0, 32
-; RV32IM-NEXT: and t0, a2, a0
-; RV32IM-NEXT: lui a0, 64
-; RV32IM-NEXT: and s2, a2, a0
-; RV32IM-NEXT: lui a0, 128
-; RV32IM-NEXT: and s4, a2, a0
-; RV32IM-NEXT: lui s5, 256
-; RV32IM-NEXT: and s5, a2, s5
-; RV32IM-NEXT: lui s6, 512
-; RV32IM-NEXT: and s6, a2, s6
-; RV32IM-NEXT: lui s7, 1024
-; RV32IM-NEXT: and s7, a2, s7
-; RV32IM-NEXT: lui s8, 2048
-; RV32IM-NEXT: and s8, a2, s8
-; RV32IM-NEXT: lui a0, 4096
-; RV32IM-NEXT: and a0, a2, a0
-; RV32IM-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 8192
-; RV32IM-NEXT: and a0, a2, a0
-; RV32IM-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 16384
-; RV32IM-NEXT: and a0, a2, a0
-; RV32IM-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 32768
-; RV32IM-NEXT: and a0, a2, a0
-; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 65536
-; RV32IM-NEXT: and a0, a2, a0
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and a0, a2, a0
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 262144
-; RV32IM-NEXT: and a0, a2, a0
+; RV32IM-NEXT: or ra, a2, a0
+; RV32IM-NEXT: srli a0, t2, 1
+; RV32IM-NEXT: and a2, t2, t1
+; RV32IM-NEXT: and a0, a0, t1
+; RV32IM-NEXT: slli a2, a2, 1
+; RV32IM-NEXT: or a0, a0, a2
+; RV32IM-NEXT: slli t6, t6, 24
+; RV32IM-NEXT: lw t2, 640(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a2, t3, t2
+; RV32IM-NEXT: slli a2, a2, 8
+; RV32IM-NEXT: or a5, t6, a2
+; RV32IM-NEXT: xor a2, t3, s1
+; RV32IM-NEXT: srli a3, t3, 8
+; RV32IM-NEXT: and a3, a3, t2
+; RV32IM-NEXT: srli a2, a2, 24
+; RV32IM-NEXT: or a7, a3, a2
+; RV32IM-NEXT: srli a2, t5, 1
+; RV32IM-NEXT: and a3, t5, t1
+; RV32IM-NEXT: and a2, a2, t1
+; RV32IM-NEXT: slli a3, a3, 1
+; RV32IM-NEXT: or a2, a2, a3
+; RV32IM-NEXT: srli a3, t4, 1
+; RV32IM-NEXT: and t0, t4, t1
+; RV32IM-NEXT: and a3, a3, t1
+; RV32IM-NEXT: slli t0, t0, 1
+; RV32IM-NEXT: or a3, a3, t0
+; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: and t0, a6, t2
+; RV32IM-NEXT: slli t0, t0, 8
+; RV32IM-NEXT: or t0, a1, t0
+; RV32IM-NEXT: xor a1, a6, a4
+; RV32IM-NEXT: srli a4, a6, 8
+; RV32IM-NEXT: and a4, a4, t2
+; RV32IM-NEXT: srli a1, a1, 24
+; RV32IM-NEXT: or a4, a4, a1
+; RV32IM-NEXT: or a1, a5, a7
+; RV32IM-NEXT: or a4, t0, a4
+; RV32IM-NEXT: sw a4, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a6, 624(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a4, a0, a6
+; RV32IM-NEXT: mul a4, ra, a4
+; RV32IM-NEXT: lui t2, 1
+; RV32IM-NEXT: and a5, a0, t2
+; RV32IM-NEXT: mul a5, ra, a5
+; RV32IM-NEXT: xor t1, a4, a5
+; RV32IM-NEXT: lui t0, 64
+; RV32IM-NEXT: and a4, a0, t0
+; RV32IM-NEXT: mul a4, ra, a4
+; RV32IM-NEXT: lui t3, 128
+; RV32IM-NEXT: and a5, a0, t3
+; RV32IM-NEXT: mul a5, ra, a5
+; RV32IM-NEXT: xor a7, a4, a5
+; RV32IM-NEXT: lui t4, 4096
+; RV32IM-NEXT: and a4, a0, t4
+; RV32IM-NEXT: mul a4, ra, a4
+; RV32IM-NEXT: lui t5, 8192
+; RV32IM-NEXT: and a5, a0, t5
+; RV32IM-NEXT: mul a5, ra, a5
+; RV32IM-NEXT: xor a4, a4, a5
+; RV32IM-NEXT: and a5, a3, a6
+; RV32IM-NEXT: and a6, a3, t2
+; RV32IM-NEXT: mul a5, a2, a5
+; RV32IM-NEXT: mul a6, a2, a6
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: sw a5, 624(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a3, t0
+; RV32IM-NEXT: and a6, a3, t3
+; RV32IM-NEXT: mul a5, a2, a5
+; RV32IM-NEXT: mul a6, a2, a6
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: sw a5, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a3, t4
+; RV32IM-NEXT: and a6, a3, t5
+; RV32IM-NEXT: mul a5, a2, a5
+; RV32IM-NEXT: mul a6, a2, a6
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: sw a5, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 2
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, ra, 1
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: sw a5, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 4
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, ra, 2
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: sw a5, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 8
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, ra, 3
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: sw a5, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 16
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, ra, 4
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: sw a5, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 32
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, ra, 5
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: sw a5, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 64
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, ra, 6
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: sw a5, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 128
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, ra, 7
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: sw a5, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 256
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, ra, 8
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: sw a5, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 512
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: slli a6, ra, 9
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: sw a5, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a5, a0, 1
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: lui t6, 2
+; RV32IM-NEXT: and a6, a0, t6
+; RV32IM-NEXT: mul a6, ra, a6
+; RV32IM-NEXT: lui t0, 4
+; RV32IM-NEXT: and t0, a0, t0
+; RV32IM-NEXT: mul t0, ra, t0
+; RV32IM-NEXT: sw t0, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t0, 8
+; RV32IM-NEXT: and t0, a0, t0
+; RV32IM-NEXT: mul t0, ra, t0
+; RV32IM-NEXT: sw t0, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t0, 16
+; RV32IM-NEXT: and t0, a0, t0
+; RV32IM-NEXT: mul t0, ra, t0
+; RV32IM-NEXT: sw t0, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t0, 32
+; RV32IM-NEXT: and t0, a0, t0
+; RV32IM-NEXT: mul t0, ra, t0
+; RV32IM-NEXT: sw t0, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s8, 256
+; RV32IM-NEXT: and t0, a0, s8
+; RV32IM-NEXT: mul t0, ra, t0
+; RV32IM-NEXT: lui s7, 512
+; RV32IM-NEXT: and t2, a0, s7
+; RV32IM-NEXT: mul t2, ra, t2
+; RV32IM-NEXT: sw t2, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s11, 1024
+; RV32IM-NEXT: and t2, a0, s11
+; RV32IM-NEXT: mul t2, ra, t2
+; RV32IM-NEXT: sw t2, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s9, 2048
+; RV32IM-NEXT: and t2, a0, s9
+; RV32IM-NEXT: mul t2, ra, t2
+; RV32IM-NEXT: sw t2, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s10, 16384
+; RV32IM-NEXT: and t2, a0, s10
+; RV32IM-NEXT: mul t2, ra, t2
+; RV32IM-NEXT: lui s5, 32768
+; RV32IM-NEXT: and t3, a0, s5
+; RV32IM-NEXT: mul t3, ra, t3
+; RV32IM-NEXT: sw t3, 512(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s4, 65536
+; RV32IM-NEXT: and t3, a0, s4
+; RV32IM-NEXT: mul t3, ra, t3
+; RV32IM-NEXT: sw t3, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s3, 131072
+; RV32IM-NEXT: and t3, a0, s3
+; RV32IM-NEXT: mul t3, ra, t3
+; RV32IM-NEXT: sw t3, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t4, 262144
+; RV32IM-NEXT: and t3, a0, t4
+; RV32IM-NEXT: mul t3, ra, t3
+; RV32IM-NEXT: sw t3, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t5, 524288
+; RV32IM-NEXT: and t3, a0, t5
+; RV32IM-NEXT: mul t3, ra, t3
+; RV32IM-NEXT: sw t3, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: and a5, a5, ra
+; RV32IM-NEXT: sw a5, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli ra, ra, 10
+; RV32IM-NEXT: andi a0, a0, 1024
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: and a0, a0, ra
; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 524288
-; RV32IM-NEXT: and a2, a2, a0
-; RV32IM-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a3
-; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a4
-; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a5
-; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a6
-; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a7
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t1
-; RV32IM-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t2
-; RV32IM-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t3
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t4
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t6
-; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s1
-; RV32IM-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s3
-; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s9
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s10
-; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s11
-; RV32IM-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t0
-; RV32IM-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s2
-; RV32IM-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s4
-; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s5
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s6
-; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s7
-; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s8
-; RV32IM-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, a2
-; RV32IM-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s1, ra, s0
-; RV32IM-NEXT: lui a0, 1
-; RV32IM-NEXT: and a0, ra, a0
-; RV32IM-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 2
-; RV32IM-NEXT: and a0, ra, a0
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 4
-; RV32IM-NEXT: and a0, ra, a0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 8
-; RV32IM-NEXT: and a0, ra, a0
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 16
-; RV32IM-NEXT: and a0, ra, a0
+; RV32IM-NEXT: xor a0, t1, a6
; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 32
-; RV32IM-NEXT: and a0, ra, a0
-; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 64
-; RV32IM-NEXT: and s2, ra, a0
-; RV32IM-NEXT: lui a0, 128
-; RV32IM-NEXT: and s4, ra, a0
-; RV32IM-NEXT: lui a0, 256
-; RV32IM-NEXT: and s5, ra, a0
-; RV32IM-NEXT: lui a0, 512
-; RV32IM-NEXT: and s6, ra, a0
-; RV32IM-NEXT: lui a0, 1024
-; RV32IM-NEXT: and s7, ra, a0
-; RV32IM-NEXT: lui a0, 2048
-; RV32IM-NEXT: and s8, ra, a0
-; RV32IM-NEXT: lui a0, 4096
-; RV32IM-NEXT: and s9, ra, a0
-; RV32IM-NEXT: lui a0, 8192
-; RV32IM-NEXT: and s10, ra, a0
-; RV32IM-NEXT: lui a0, 16384
-; RV32IM-NEXT: and s11, ra, a0
-; RV32IM-NEXT: lui a0, 32768
-; RV32IM-NEXT: and a0, ra, a0
-; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 65536
-; RV32IM-NEXT: and a0, ra, a0
-; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and a0, ra, a0
-; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 262144
-; RV32IM-NEXT: and a0, ra, a0
-; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 524288
-; RV32IM-NEXT: and t2, ra, a0
-; RV32IM-NEXT: andi t3, ra, 2
-; RV32IM-NEXT: andi s0, ra, 1
-; RV32IM-NEXT: andi a0, ra, 4
-; RV32IM-NEXT: andi a1, ra, 8
-; RV32IM-NEXT: andi a2, ra, 16
-; RV32IM-NEXT: andi a3, ra, 32
-; RV32IM-NEXT: andi a4, ra, 64
-; RV32IM-NEXT: andi a5, ra, 128
-; RV32IM-NEXT: andi a6, ra, 256
-; RV32IM-NEXT: andi a7, ra, 512
-; RV32IM-NEXT: andi ra, ra, 1024
-; RV32IM-NEXT: mul t0, t5, t3
-; RV32IM-NEXT: sw t0, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t0, t5, s0
-; RV32IM-NEXT: sw t0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, t5, a0
-; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t6, t5, a1
-; RV32IM-NEXT: mul s3, t5, a2
-; RV32IM-NEXT: mul t4, t5, a3
-; RV32IM-NEXT: mul a0, t5, a4
-; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, t5, a5
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s0, t5, a6
-; RV32IM-NEXT: mul t1, t5, a7
-; RV32IM-NEXT: mul a0, t5, ra
-; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, t5, s1
+; RV32IM-NEXT: xor a0, a7, t0
; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 680(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t5, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s1, t5, a0
-; RV32IM-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t0, t5, a0
-; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul ra, t5, a0
-; RV32IM-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t5, a0
+; RV32IM-NEXT: xor a0, a4, t2
; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t5, a0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, t5, s2
-; RV32IM-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, t5, s4
-; RV32IM-NEXT: mul t3, t5, s5
-; RV32IM-NEXT: mul s6, t5, s6
-; RV32IM-NEXT: mul s7, t5, s7
-; RV32IM-NEXT: mul a0, t5, s8
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s8, t5, s9
-; RV32IM-NEXT: mul s9, t5, s10
-; RV32IM-NEXT: mul s10, t5, s11
-; RV32IM-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s11, t5, a0
-; RV32IM-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s4, t5, a0
-; RV32IM-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t5, a0
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, t5, a0
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, t5, t2
+; RV32IM-NEXT: srli a0, a1, 4
+; RV32IM-NEXT: lw a4, 648(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a1, a1, a4
+; RV32IM-NEXT: and a0, a0, a4
+; RV32IM-NEXT: slli a1, a1, 4
+; RV32IM-NEXT: or a0, a0, a1
+; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 2
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 1
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 4
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli s1, a2, 2
+; RV32IM-NEXT: and a0, a0, s1
+; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 8
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli s0, a2, 3
+; RV32IM-NEXT: and a0, a0, s0
+; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 16
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 4
+; RV32IM-NEXT: and a0, a0, a1
; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 32
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 5
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 64
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 6
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 128
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 7
+; RV32IM-NEXT: and ra, a0, a1
+; RV32IM-NEXT: andi a0, a3, 256
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a4, a2, 8
+; RV32IM-NEXT: and s6, a0, a4
+; RV32IM-NEXT: andi a0, a3, 512
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 9
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a3, t6
+; RV32IM-NEXT: lui a1, 4
+; RV32IM-NEXT: and a1, a3, a1
+; RV32IM-NEXT: lui a5, 8
+; RV32IM-NEXT: and a5, a3, a5
+; RV32IM-NEXT: lui a0, 16
+; RV32IM-NEXT: and a7, a3, a0
+; RV32IM-NEXT: lui a0, 32
+; RV32IM-NEXT: and t1, a3, a0
+; RV32IM-NEXT: and t2, a3, s8
+; RV32IM-NEXT: and s7, a3, s7
+; RV32IM-NEXT: and s8, a3, s11
+; RV32IM-NEXT: and s9, a3, s9
+; RV32IM-NEXT: and a4, a3, s10
+; RV32IM-NEXT: and a6, a3, s5
+; RV32IM-NEXT: and t0, a3, s4
+; RV32IM-NEXT: and t3, a3, s3
+; RV32IM-NEXT: and t4, a3, t4
+; RV32IM-NEXT: and t5, a3, t5
+; RV32IM-NEXT: andi s3, a3, 1
+; RV32IM-NEXT: seqz a0, s3
+; RV32IM-NEXT: mul t6, a2, t6
+; RV32IM-NEXT: mul s1, a2, a1
+; RV32IM-NEXT: mul s4, a2, a5
+; RV32IM-NEXT: mul s11, a2, a7
+; RV32IM-NEXT: mul a1, a2, t1
+; RV32IM-NEXT: sw a1, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t1, a2, t2
+; RV32IM-NEXT: mul s7, a2, s7
+; RV32IM-NEXT: mul s8, a2, s8
+; RV32IM-NEXT: mul s10, a2, s9
+; RV32IM-NEXT: mul s9, a2, a4
+; RV32IM-NEXT: mul s0, a2, a6
+; RV32IM-NEXT: mul s3, a2, t0
+; RV32IM-NEXT: mul s5, a2, t3
+; RV32IM-NEXT: mul a1, a2, t4
+; RV32IM-NEXT: sw a1, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, a2, t5
+; RV32IM-NEXT: sw a1, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: and a7, a0, a2
+; RV32IM-NEXT: slli a2, a2, 10
+; RV32IM-NEXT: andi a3, a3, 1024
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and t3, a3, a2
+; RV32IM-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a0, t6
+; RV32IM-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, a0, t1
+; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, a0, s9
+; RV32IM-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t6, a0, 4
+; RV32IM-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and s9, a0, a1
+; RV32IM-NEXT: and t6, t6, a1
+; RV32IM-NEXT: slli s9, s9, 4
+; RV32IM-NEXT: or t6, t6, s9
+; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s9, a1, a0
+; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a0, a1
; RV32IM-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, a1, a0
-; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a0, a1
-; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a0, a1
+; RV32IM-NEXT: xor t0, a0, a1
; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 544(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a2, a0, a2
-; RV32IM-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a3, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a0, a3
-; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a0, a4
+; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a0
; RV32IM-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a5, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a0, a5
-; RV32IM-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli a0, a1, 4
-; RV32IM-NEXT: lw s5, 684(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a1, a1, s5
-; RV32IM-NEXT: and a0, a0, s5
-; RV32IM-NEXT: slli a1, a1, 4
-; RV32IM-NEXT: or a0, a0, a1
+; RV32IM-NEXT: lw a4, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a0
+; RV32IM-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a0
+; RV32IM-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, a0
+; RV32IM-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw t5, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, t5, a1
-; RV32IM-NEXT: lw t5, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, t5, t6
-; RV32IM-NEXT: xor t4, s3, t4
-; RV32IM-NEXT: xor t1, s0, t1
-; RV32IM-NEXT: xor t0, s1, t0
-; RV32IM-NEXT: xor t3, s2, t3
-; RV32IM-NEXT: xor s0, s8, s9
-; RV32IM-NEXT: lw s2, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s1, s2, 4
-; RV32IM-NEXT: and s2, s2, s5
-; RV32IM-NEXT: and s1, s1, s5
-; RV32IM-NEXT: slli s2, s2, 4
-; RV32IM-NEXT: or s1, s1, s2
-; RV32IM-NEXT: xor a6, t2, a6
-; RV32IM-NEXT: lw t2, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, t2
-; RV32IM-NEXT: lw t2, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t2
-; RV32IM-NEXT: lw t2, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t2
-; RV32IM-NEXT: lw t2, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, t2
-; RV32IM-NEXT: lw t2, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, t2
-; RV32IM-NEXT: xor a1, a1, t6
-; RV32IM-NEXT: lw t2, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t4, t2
-; RV32IM-NEXT: lw t4, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t4
-; RV32IM-NEXT: xor t0, t0, ra
-; RV32IM-NEXT: xor t3, t3, s6
-; RV32IM-NEXT: xor t4, s0, s10
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: lw a7, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a7
-; RV32IM-NEXT: lw a7, 616(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a7
-; RV32IM-NEXT: lw a7, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a7
-; RV32IM-NEXT: lw a7, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a7
-; RV32IM-NEXT: xor a1, a1, t2
-; RV32IM-NEXT: lw a7, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, t1, a7
-; RV32IM-NEXT: lw t1, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, t1
-; RV32IM-NEXT: xor t1, t3, s7
-; RV32IM-NEXT: xor t2, t4, s11
-; RV32IM-NEXT: lw t3, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, t3
-; RV32IM-NEXT: lw t3, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t3
-; RV32IM-NEXT: lw t3, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t3
-; RV32IM-NEXT: lw t3, 620(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, t3
-; RV32IM-NEXT: lw t3, 612(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, t3
-; RV32IM-NEXT: srli t3, a0, 2
-; RV32IM-NEXT: lw t6, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, a0, t6
-; RV32IM-NEXT: and t3, t3, t6
-; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or a0, t3, a0
-; RV32IM-NEXT: lw t3, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, t3
-; RV32IM-NEXT: lw t3, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, t3
-; RV32IM-NEXT: lw t3, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, t3
-; RV32IM-NEXT: lw t3, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t3
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw t4, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, t4
+; RV32IM-NEXT: xor s6, ra, s6
+; RV32IM-NEXT: xor t2, t2, s1
+; RV32IM-NEXT: xor t1, t1, s7
+; RV32IM-NEXT: xor t5, t5, s0
+; RV32IM-NEXT: xor a6, s9, a6
+; RV32IM-NEXT: lw s0, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, s0
+; RV32IM-NEXT: lw s0, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, s0
+; RV32IM-NEXT: lw s0, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, s0
+; RV32IM-NEXT: lw s0, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, s0
+; RV32IM-NEXT: lw s0, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, s0
+; RV32IM-NEXT: xor a0, a7, a0
+; RV32IM-NEXT: lw a7, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a7
+; RV32IM-NEXT: lw a7, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, s6, a7
; RV32IM-NEXT: xor t2, t2, s4
-; RV32IM-NEXT: srli t3, s1, 2
-; RV32IM-NEXT: and t4, s1, t6
-; RV32IM-NEXT: and t3, t3, t6
-; RV32IM-NEXT: slli t4, t4, 2
-; RV32IM-NEXT: or t3, t3, t4
-; RV32IM-NEXT: lw t4, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t4
-; RV32IM-NEXT: lw t4, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, t4
-; RV32IM-NEXT: lw t4, 680(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, t4
-; RV32IM-NEXT: lw t4, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t2, t4
-; RV32IM-NEXT: xor a2, a6, a2
-; RV32IM-NEXT: xor a2, a2, a3
-; RV32IM-NEXT: lw a3, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a5, a3
-; RV32IM-NEXT: xor a5, a1, a7
+; RV32IM-NEXT: xor t1, t1, s8
+; RV32IM-NEXT: xor t5, t5, s3
+; RV32IM-NEXT: xor a6, a6, t0
+; RV32IM-NEXT: lw t0, 520(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, t0
+; RV32IM-NEXT: lw t0, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, t0
+; RV32IM-NEXT: lw t0, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, t0
+; RV32IM-NEXT: lw t0, 540(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a5, a5, t0
-; RV32IM-NEXT: lw a7, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, t2, a7
-; RV32IM-NEXT: xor t0, a2, a4
+; RV32IM-NEXT: lw s0, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t0, s0, 2
+; RV32IM-NEXT: and s0, s0, s2
+; RV32IM-NEXT: and t0, t0, s2
+; RV32IM-NEXT: slli s0, s0, 2
+; RV32IM-NEXT: or t0, t0, s0
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: xor a1, a7, t3
+; RV32IM-NEXT: xor a7, t2, s11
+; RV32IM-NEXT: xor t1, t1, s10
+; RV32IM-NEXT: xor t2, t5, s5
+; RV32IM-NEXT: srli t3, t6, 2
+; RV32IM-NEXT: and t5, t6, s2
+; RV32IM-NEXT: and t3, t3, s2
+; RV32IM-NEXT: slli t5, t5, 2
+; RV32IM-NEXT: or t3, t3, t5
+; RV32IM-NEXT: xor a2, a6, a2
+; RV32IM-NEXT: lw a6, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a6
+; RV32IM-NEXT: lw a6, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: xor a6, a0, a1
+; RV32IM-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, a0
+; RV32IM-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, t2, a0
+; RV32IM-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a1
+; RV32IM-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a0, a1
+; RV32IM-NEXT: srli a0, t0, 1
+; RV32IM-NEXT: xor a3, a2, a3
+; RV32IM-NEXT: xor a4, a3, a4
+; RV32IM-NEXT: srli a1, t3, 1
+; RV32IM-NEXT: lw t4, 644(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a3, t0, t4
+; RV32IM-NEXT: and t0, t3, t4
+; RV32IM-NEXT: slli t3, a2, 24
+; RV32IM-NEXT: xor a2, a6, a7
+; RV32IM-NEXT: xor a7, a2, t1
+; RV32IM-NEXT: and a2, a0, t4
+; RV32IM-NEXT: slli a3, a3, 1
+; RV32IM-NEXT: or a2, a2, a3
+; RV32IM-NEXT: and a3, a1, t4
+; RV32IM-NEXT: slli t0, t0, 1
+; RV32IM-NEXT: or a3, a3, t0
+; RV32IM-NEXT: lw t1, 640(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and t0, a4, t1
+; RV32IM-NEXT: slli t0, t0, 8
+; RV32IM-NEXT: or t0, t3, t0
; RV32IM-NEXT: slli a6, a6, 24
-; RV32IM-NEXT: lw a2, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, a3, a2
-; RV32IM-NEXT: srli a3, a0, 1
-; RV32IM-NEXT: xor t1, a5, t1
-; RV32IM-NEXT: srli a2, t3, 1
-; RV32IM-NEXT: lw t5, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, a0, t5
-; RV32IM-NEXT: and a5, t3, t5
-; RV32IM-NEXT: lw a4, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, a4
-; RV32IM-NEXT: and a4, a3, t5
-; RV32IM-NEXT: slli a0, a0, 1
-; RV32IM-NEXT: or a4, a4, a0
-; RV32IM-NEXT: and a0, a2, t5
-; RV32IM-NEXT: slli a5, a5, 1
-; RV32IM-NEXT: or a5, a0, a5
-; RV32IM-NEXT: lw t3, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, t0, t3
-; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a0, a6, a0
-; RV32IM-NEXT: slli a1, a1, 24
-; RV32IM-NEXT: xor a6, t0, t2
-; RV32IM-NEXT: srli t0, t0, 8
-; RV32IM-NEXT: and t0, t0, t3
+; RV32IM-NEXT: xor a5, a4, a5
+; RV32IM-NEXT: srli a4, a4, 8
+; RV32IM-NEXT: and a4, a4, t1
+; RV32IM-NEXT: srli a5, a5, 24
+; RV32IM-NEXT: or a4, a4, a5
+; RV32IM-NEXT: and a5, a7, t1
+; RV32IM-NEXT: slli a5, a5, 8
+; RV32IM-NEXT: or a5, a6, a5
+; RV32IM-NEXT: xor a6, a7, t2
+; RV32IM-NEXT: srli a7, a7, 8
+; RV32IM-NEXT: and a7, a7, t1
+; RV32IM-NEXT: mv t2, t1
; RV32IM-NEXT: srli a6, a6, 24
-; RV32IM-NEXT: or a6, t0, a6
-; RV32IM-NEXT: and t0, t1, t3
-; RV32IM-NEXT: slli t0, t0, 8
-; RV32IM-NEXT: or a1, a1, t0
-; RV32IM-NEXT: xor a7, t1, a7
-; RV32IM-NEXT: srli t0, t1, 8
-; RV32IM-NEXT: and t0, t0, t3
-; RV32IM-NEXT: srli a7, a7, 24
-; RV32IM-NEXT: or a7, t0, a7
-; RV32IM-NEXT: or a6, a0, a6
-; RV32IM-NEXT: lui a0, 349525
-; RV32IM-NEXT: addi a0, a0, 1364
-; RV32IM-NEXT: or a1, a1, a7
-; RV32IM-NEXT: srli a7, a6, 4
-; RV32IM-NEXT: and a6, a6, s5
-; RV32IM-NEXT: and a7, a7, s5
-; RV32IM-NEXT: slli a6, a6, 4
; RV32IM-NEXT: or a6, a7, a6
-; RV32IM-NEXT: srli a7, a1, 4
-; RV32IM-NEXT: and a1, a1, s5
-; RV32IM-NEXT: and a7, a7, s5
-; RV32IM-NEXT: slli a1, a1, 4
-; RV32IM-NEXT: or a1, a7, a1
+; RV32IM-NEXT: or a7, t0, a4
+; RV32IM-NEXT: lui a4, 349525
+; RV32IM-NEXT: addi a4, a4, 1364
+; RV32IM-NEXT: or a5, a5, a6
+; RV32IM-NEXT: srli a6, a7, 4
+; RV32IM-NEXT: lw t3, 648(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a7, a7, t3
+; RV32IM-NEXT: and a6, a6, t3
+; RV32IM-NEXT: slli a7, a7, 4
+; RV32IM-NEXT: or a6, a6, a7
+; RV32IM-NEXT: srli a7, a5, 4
+; RV32IM-NEXT: and a5, a5, t3
+; RV32IM-NEXT: and a7, a7, t3
+; RV32IM-NEXT: slli a5, a5, 4
+; RV32IM-NEXT: or a5, a7, a5
; RV32IM-NEXT: srli a7, a6, 2
-; RV32IM-NEXT: and a6, a6, t6
-; RV32IM-NEXT: and a7, a7, t6
+; RV32IM-NEXT: and a6, a6, s2
+; RV32IM-NEXT: and a7, a7, s2
; RV32IM-NEXT: slli a6, a6, 2
; RV32IM-NEXT: or a6, a7, a6
-; RV32IM-NEXT: srli a7, a1, 2
-; RV32IM-NEXT: and a1, a1, t6
-; RV32IM-NEXT: and a7, a7, t6
-; RV32IM-NEXT: slli a1, a1, 2
-; RV32IM-NEXT: or a1, a7, a1
+; RV32IM-NEXT: srli a7, a5, 2
+; RV32IM-NEXT: and a5, a5, s2
+; RV32IM-NEXT: and a7, a7, s2
+; RV32IM-NEXT: slli a5, a5, 2
+; RV32IM-NEXT: or a5, a7, a5
; RV32IM-NEXT: srli a7, a6, 1
-; RV32IM-NEXT: and a6, a6, t5
-; RV32IM-NEXT: and a7, a7, a0
+; RV32IM-NEXT: and a6, a6, t4
+; RV32IM-NEXT: and a7, a7, a4
; RV32IM-NEXT: slli a6, a6, 1
; RV32IM-NEXT: or a6, a7, a6
-; RV32IM-NEXT: srli a7, a1, 1
-; RV32IM-NEXT: and a1, a1, t5
-; RV32IM-NEXT: and a7, a7, a0
-; RV32IM-NEXT: slli a1, a1, 1
-; RV32IM-NEXT: or a1, a7, a1
+; RV32IM-NEXT: srli a7, a5, 1
+; RV32IM-NEXT: and a5, a5, t4
+; RV32IM-NEXT: and a7, a7, a4
+; RV32IM-NEXT: slli a5, a5, 1
+; RV32IM-NEXT: or a5, a7, a5
; RV32IM-NEXT: srli a6, a6, 1
-; RV32IM-NEXT: srli a1, a1, 1
-; RV32IM-NEXT: lw a7, 676(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a5, a5, 1
+; RV32IM-NEXT: lw a7, 620(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: lw a7, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a7
+; RV32IM-NEXT: lw a7, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a7
; RV32IM-NEXT: srli a7, a6, 8
; RV32IM-NEXT: srli t0, a6, 24
; RV32IM-NEXT: slli t1, a6, 24
-; RV32IM-NEXT: and a6, a6, t3
-; RV32IM-NEXT: and a7, a7, t3
+; RV32IM-NEXT: and a6, a6, t2
+; RV32IM-NEXT: and a7, a7, t2
; RV32IM-NEXT: or a7, a7, t0
-; RV32IM-NEXT: srli t0, a1, 8
+; RV32IM-NEXT: srli t0, a5, 8
; RV32IM-NEXT: slli a6, a6, 8
; RV32IM-NEXT: or a6, t1, a6
-; RV32IM-NEXT: srli t1, a1, 24
-; RV32IM-NEXT: and t0, t0, t3
+; RV32IM-NEXT: srli t1, a5, 24
+; RV32IM-NEXT: and t0, t0, t2
; RV32IM-NEXT: or t0, t0, t1
-; RV32IM-NEXT: and t1, a1, t3
-; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: and t1, a5, t2
+; RV32IM-NEXT: slli a5, a5, 24
; RV32IM-NEXT: slli t1, t1, 8
-; RV32IM-NEXT: or a1, a1, t1
+; RV32IM-NEXT: or a5, a5, t1
; RV32IM-NEXT: or a6, a6, a7
-; RV32IM-NEXT: or a1, a1, t0
+; RV32IM-NEXT: or a5, a5, t0
; RV32IM-NEXT: srli a7, a6, 4
-; RV32IM-NEXT: and a6, a6, s5
-; RV32IM-NEXT: srli t0, a1, 4
-; RV32IM-NEXT: and a1, a1, s5
-; RV32IM-NEXT: and a7, a7, s5
-; RV32IM-NEXT: and t0, t0, s5
+; RV32IM-NEXT: and a6, a6, t3
+; RV32IM-NEXT: srli t0, a5, 4
+; RV32IM-NEXT: and a5, a5, t3
+; RV32IM-NEXT: and a7, a7, t3
+; RV32IM-NEXT: and t0, t0, t3
; RV32IM-NEXT: slli a6, a6, 4
-; RV32IM-NEXT: slli a1, a1, 4
+; RV32IM-NEXT: slli a5, a5, 4
; RV32IM-NEXT: or a6, a7, a6
-; RV32IM-NEXT: or a1, t0, a1
+; RV32IM-NEXT: or a5, t0, a5
; RV32IM-NEXT: srli a7, a6, 2
-; RV32IM-NEXT: and a6, a6, t6
-; RV32IM-NEXT: srli t0, a1, 2
-; RV32IM-NEXT: and a1, a1, t6
-; RV32IM-NEXT: and a7, a7, t6
-; RV32IM-NEXT: and t0, t0, t6
+; RV32IM-NEXT: and a6, a6, s2
+; RV32IM-NEXT: srli t0, a5, 2
+; RV32IM-NEXT: and a5, a5, s2
+; RV32IM-NEXT: and a7, a7, s2
+; RV32IM-NEXT: and t0, t0, s2
; RV32IM-NEXT: slli a6, a6, 2
; RV32IM-NEXT: or a6, a7, a6
-; RV32IM-NEXT: slli a1, a1, 2
-; RV32IM-NEXT: or a1, t0, a1
+; RV32IM-NEXT: slli a5, a5, 2
+; RV32IM-NEXT: or a5, t0, a5
; RV32IM-NEXT: srli a7, a6, 1
-; RV32IM-NEXT: and a6, a6, t5
-; RV32IM-NEXT: and t0, a1, t5
-; RV32IM-NEXT: srli a1, a1, 1
-; RV32IM-NEXT: and a7, a7, a0
-; RV32IM-NEXT: and a0, a1, a0
+; RV32IM-NEXT: and a6, a6, t4
+; RV32IM-NEXT: and t0, a5, t4
+; RV32IM-NEXT: srli a5, a5, 1
+; RV32IM-NEXT: and a7, a7, a4
+; RV32IM-NEXT: and a4, a5, a4
; RV32IM-NEXT: slli a6, a6, 1
-; RV32IM-NEXT: or a1, a7, a6
+; RV32IM-NEXT: or a5, a7, a6
; RV32IM-NEXT: slli t0, t0, 1
-; RV32IM-NEXT: or a0, a0, t0
-; RV32IM-NEXT: slli a3, a3, 31
-; RV32IM-NEXT: srli a1, a1, 1
-; RV32IM-NEXT: or a1, a1, a3
-; RV32IM-NEXT: slli a2, a2, 31
-; RV32IM-NEXT: srli a0, a0, 1
-; RV32IM-NEXT: or a0, a0, a2
-; RV32IM-NEXT: srli a4, a4, 1
+; RV32IM-NEXT: or a4, a4, t0
+; RV32IM-NEXT: slli a0, a0, 31
; RV32IM-NEXT: srli a5, a5, 1
-; RV32IM-NEXT: lw a2, 640(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: sw a1, 0(a2)
-; RV32IM-NEXT: sw a4, 4(a2)
-; RV32IM-NEXT: sw a0, 8(a2)
-; RV32IM-NEXT: sw a5, 12(a2)
-; RV32IM-NEXT: lw a2, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: sw a1, 0(a2)
-; RV32IM-NEXT: sw a4, 4(a2)
-; RV32IM-NEXT: sw a0, 8(a2)
-; RV32IM-NEXT: sw a5, 12(a2)
-; RV32IM-NEXT: lw ra, 748(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 744(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 740(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s2, 736(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s3, 732(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s4, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s5, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s8, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s9, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s10, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s11, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: addi sp, sp, 752
+; RV32IM-NEXT: or a0, a5, a0
+; RV32IM-NEXT: slli a1, a1, 31
+; RV32IM-NEXT: srli a4, a4, 1
+; RV32IM-NEXT: or a1, a4, a1
+; RV32IM-NEXT: srli a2, a2, 1
+; RV32IM-NEXT: srli a3, a3, 1
+; RV32IM-NEXT: lw a4, 632(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 0(a4)
+; RV32IM-NEXT: sw a2, 4(a4)
+; RV32IM-NEXT: sw a1, 8(a4)
+; RV32IM-NEXT: sw a3, 12(a4)
+; RV32IM-NEXT: lw a4, 636(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 0(a4)
+; RV32IM-NEXT: sw a2, 4(a4)
+; RV32IM-NEXT: sw a1, 8(a4)
+; RV32IM-NEXT: sw a3, 12(a4)
+; RV32IM-NEXT: lw ra, 700(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 696(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 692(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 688(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 684(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s4, 680(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 676(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s6, 672(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s7, 668(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s8, 664(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s9, 660(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 656(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s11, 652(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 704
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: commutative_clmulh_v2i64:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: addi sp, sp, -1200
-; RV64IM-NEXT: sd ra, 1192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s0, 1184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s1, 1176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s2, 1168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s3, 1160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s4, 1152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s5, 1144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s6, 1136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s7, 1128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s8, 1120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s9, 1112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s10, 1104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s11, 1096(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd a5, 984(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd a4, 976(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: srli a7, a2, 24
-; RV64IM-NEXT: lui s9, 4080
-; RV64IM-NEXT: srli t0, a2, 8
+; RV64IM-NEXT: addi sp, sp, -960
+; RV64IM-NEXT: sd ra, 952(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s0, 944(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 936(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s2, 928(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s3, 920(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s4, 912(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s5, 904(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s6, 896(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s7, 888(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s8, 880(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s9, 872(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s10, 864(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s11, 856(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a5, 824(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 816(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mv s6, a1
+; RV64IM-NEXT: mv t6, a0
+; RV64IM-NEXT: srli t0, a2, 24
+; RV64IM-NEXT: lui t5, 4080
+; RV64IM-NEXT: srli t1, a2, 8
; RV64IM-NEXT: li t4, 255
-; RV64IM-NEXT: srli a4, a2, 40
-; RV64IM-NEXT: lui s10, 16
-; RV64IM-NEXT: srli a5, a2, 56
-; RV64IM-NEXT: srliw t1, a2, 24
-; RV64IM-NEXT: slli a6, a2, 56
-; RV64IM-NEXT: sd a6, 1088(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui t6, 61681
-; RV64IM-NEXT: lui s0, 209715
-; RV64IM-NEXT: lui s11, 349525
-; RV64IM-NEXT: srli s2, a0, 24
-; RV64IM-NEXT: srli t2, a0, 8
-; RV64IM-NEXT: srli a6, a0, 40
-; RV64IM-NEXT: srli t3, a0, 56
-; RV64IM-NEXT: srliw t5, a0, 24
-; RV64IM-NEXT: slli s4, a0, 56
-; RV64IM-NEXT: srli s6, a3, 24
-; RV64IM-NEXT: srli s8, a3, 8
+; RV64IM-NEXT: srli t2, a2, 40
+; RV64IM-NEXT: lui a0, 16
+; RV64IM-NEXT: srli s0, a2, 56
+; RV64IM-NEXT: srliw a6, a2, 24
+; RV64IM-NEXT: slli a1, a2, 56
+; RV64IM-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui t3, 61681
+; RV64IM-NEXT: lui s3, 209715
+; RV64IM-NEXT: lui s2, 349525
+; RV64IM-NEXT: srli s4, t6, 24
+; RV64IM-NEXT: srli s1, t6, 8
+; RV64IM-NEXT: srli a4, t6, 40
+; RV64IM-NEXT: srli a5, t6, 56
+; RV64IM-NEXT: srliw a7, t6, 24
+; RV64IM-NEXT: slli a1, t6, 56
+; RV64IM-NEXT: sd a1, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: srli s7, a3, 24
+; RV64IM-NEXT: srli s9, a3, 8
; RV64IM-NEXT: srli s5, a3, 40
-; RV64IM-NEXT: srli s7, a3, 56
-; RV64IM-NEXT: and a7, a7, s9
-; RV64IM-NEXT: slli s1, t4, 24
-; RV64IM-NEXT: and t0, t0, s1
-; RV64IM-NEXT: or a7, t0, a7
-; RV64IM-NEXT: addi t0, s10, -256
-; RV64IM-NEXT: and a4, a4, t0
+; RV64IM-NEXT: srli s8, a3, 56
+; RV64IM-NEXT: and t0, t0, t5
+; RV64IM-NEXT: slli t4, t4, 24
+; RV64IM-NEXT: and t1, t1, t4
+; RV64IM-NEXT: or t0, t1, t0
+; RV64IM-NEXT: addi a0, a0, -256
+; RV64IM-NEXT: and t1, t2, a0
+; RV64IM-NEXT: or a1, t1, s0
+; RV64IM-NEXT: and t1, a2, t5
+; RV64IM-NEXT: slli a6, a6, 32
+; RV64IM-NEXT: addi t2, t3, -241
+; RV64IM-NEXT: addi t3, s3, 819
+; RV64IM-NEXT: addi s0, s2, 1365
+; RV64IM-NEXT: slli t1, t1, 24
+; RV64IM-NEXT: or t1, t1, a6
+; RV64IM-NEXT: slli a6, t2, 32
+; RV64IM-NEXT: add s10, t2, a6
+; RV64IM-NEXT: slli a6, t3, 32
+; RV64IM-NEXT: add s11, t3, a6
+; RV64IM-NEXT: slli a6, s0, 32
+; RV64IM-NEXT: add ra, s0, a6
+; RV64IM-NEXT: srliw t2, a3, 24
+; RV64IM-NEXT: and a6, s4, t5
+; RV64IM-NEXT: and t3, s1, t4
+; RV64IM-NEXT: or a6, t3, a6
+; RV64IM-NEXT: srli t3, s6, 24
+; RV64IM-NEXT: and a4, a4, a0
; RV64IM-NEXT: or a5, a4, a5
-; RV64IM-NEXT: and a4, a2, s9
-; RV64IM-NEXT: slli t1, t1, 32
-; RV64IM-NEXT: addi t4, t6, -241
-; RV64IM-NEXT: addi t6, s0, 819
-; RV64IM-NEXT: addi s0, s11, 1365
+; RV64IM-NEXT: and a4, t6, t5
+; RV64IM-NEXT: slli a7, a7, 32
; RV64IM-NEXT: slli a4, a4, 24
-; RV64IM-NEXT: or s3, a4, t1
-; RV64IM-NEXT: slli a4, t4, 32
-; RV64IM-NEXT: add ra, t4, a4
-; RV64IM-NEXT: slli a4, t6, 32
-; RV64IM-NEXT: add s11, t6, a4
-; RV64IM-NEXT: slli a4, s0, 32
-; RV64IM-NEXT: add s10, s0, a4
-; RV64IM-NEXT: srliw a4, a3, 24
-; RV64IM-NEXT: and t1, s2, s9
-; RV64IM-NEXT: and t2, t2, s1
-; RV64IM-NEXT: or t1, t2, t1
-; RV64IM-NEXT: srli t2, a1, 24
-; RV64IM-NEXT: and a6, a6, t0
-; RV64IM-NEXT: or t3, a6, t3
-; RV64IM-NEXT: and a6, a0, s9
-; RV64IM-NEXT: slli t5, t5, 32
-; RV64IM-NEXT: slli a6, a6, 24
-; RV64IM-NEXT: or a6, a6, t5
-; RV64IM-NEXT: srli t4, a1, 8
-; RV64IM-NEXT: and t5, s6, s9
-; RV64IM-NEXT: mv s2, s1
-; RV64IM-NEXT: sd s1, 968(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and t6, s8, s1
-; RV64IM-NEXT: or t5, t6, t5
-; RV64IM-NEXT: srli t6, a1, 40
-; RV64IM-NEXT: and s0, s5, t0
-; RV64IM-NEXT: or s0, s0, s7
-; RV64IM-NEXT: and s1, a3, s9
-; RV64IM-NEXT: slli a4, a4, 32
-; RV64IM-NEXT: slli s1, s1, 24
-; RV64IM-NEXT: or s1, s1, a4
-; RV64IM-NEXT: srli a4, a1, 56
-; RV64IM-NEXT: and t2, t2, s9
-; RV64IM-NEXT: and t4, t4, s2
-; RV64IM-NEXT: or t2, t4, t2
-; RV64IM-NEXT: srliw t4, a1, 24
-; RV64IM-NEXT: mv s2, t0
-; RV64IM-NEXT: and t6, t6, t0
-; RV64IM-NEXT: or t6, t6, a4
-; RV64IM-NEXT: and a4, a1, s9
-; RV64IM-NEXT: slli t4, t4, 32
-; RV64IM-NEXT: slli a4, a4, 24
-; RV64IM-NEXT: or t4, a4, t4
-; RV64IM-NEXT: li a4, 1
-; RV64IM-NEXT: or a7, a7, a5
-; RV64IM-NEXT: slli t0, a3, 56
-; RV64IM-NEXT: and a2, a2, s2
-; RV64IM-NEXT: mv a5, s2
-; RV64IM-NEXT: sd s2, 952(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: or a4, a4, a7
+; RV64IM-NEXT: srli a7, s6, 8
+; RV64IM-NEXT: and s0, s7, t5
+; RV64IM-NEXT: sd t4, 808(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s1, s9, t4
+; RV64IM-NEXT: or s1, s1, s0
+; RV64IM-NEXT: srli s0, s6, 40
+; RV64IM-NEXT: and s2, s5, a0
+; RV64IM-NEXT: or s3, s2, s8
+; RV64IM-NEXT: and s2, a3, t5
+; RV64IM-NEXT: slli t2, t2, 32
+; RV64IM-NEXT: slli s2, s2, 24
+; RV64IM-NEXT: or t2, s2, t2
+; RV64IM-NEXT: srli s2, s6, 56
+; RV64IM-NEXT: and t3, t3, t5
+; RV64IM-NEXT: and a7, a7, t4
+; RV64IM-NEXT: or a7, a7, t3
+; RV64IM-NEXT: srliw t3, s6, 24
+; RV64IM-NEXT: mv s8, a0
+; RV64IM-NEXT: sd a0, 784(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, s0, a0
+; RV64IM-NEXT: or s4, s0, s2
+; RV64IM-NEXT: and s0, s6, t5
+; RV64IM-NEXT: slli t3, t3, 32
+; RV64IM-NEXT: slli s0, s0, 24
+; RV64IM-NEXT: or t3, s0, t3
+; RV64IM-NEXT: li s0, 1
+; RV64IM-NEXT: or s5, t0, a1
+; RV64IM-NEXT: lui s2, 1
+; RV64IM-NEXT: and a2, a2, a0
; RV64IM-NEXT: slli a2, a2, 40
-; RV64IM-NEXT: ld s2, 1088(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: or a2, s2, a2
-; RV64IM-NEXT: slli s2, a1, 56
-; RV64IM-NEXT: or t1, t1, t3
-; RV64IM-NEXT: slli t3, a4, 11
-; RV64IM-NEXT: sd t3, 992(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a0, a0, a5
+; RV64IM-NEXT: ld a0, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: or a2, a0, a2
+; RV64IM-NEXT: lui t0, 64
+; RV64IM-NEXT: or t5, a6, a5
+; RV64IM-NEXT: lui t4, 128
+; RV64IM-NEXT: and a0, t6, s8
; RV64IM-NEXT: slli a0, a0, 40
-; RV64IM-NEXT: or a0, s4, a0
-; RV64IM-NEXT: slli t3, a4, 31
-; RV64IM-NEXT: sd t3, 1016(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or t3, t5, s0
-; RV64IM-NEXT: slli t5, a4, 32
-; RV64IM-NEXT: sd t5, 1048(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a3, a3, a5
+; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: or a0, a1, a0
+; RV64IM-NEXT: lui s7, 4096
+; RV64IM-NEXT: or t6, s1, s3
+; RV64IM-NEXT: slli a6, a3, 56
+; RV64IM-NEXT: and a3, a3, s8
; RV64IM-NEXT: slli a3, a3, 40
-; RV64IM-NEXT: or a3, t0, a3
-; RV64IM-NEXT: slli t0, a4, 33
-; RV64IM-NEXT: sd t0, 1008(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or t0, t2, t6
-; RV64IM-NEXT: slli t2, a4, 34
-; RV64IM-NEXT: sd t2, 1032(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a1, a5
+; RV64IM-NEXT: or s1, a6, a3
+; RV64IM-NEXT: lui a6, 8192
+; RV64IM-NEXT: or a7, a7, s4
+; RV64IM-NEXT: slli a3, s6, 56
+; RV64IM-NEXT: and a1, s6, s8
; RV64IM-NEXT: slli a1, a1, 40
-; RV64IM-NEXT: or a1, s2, a1
-; RV64IM-NEXT: slli a5, a4, 35
-; RV64IM-NEXT: sd a5, 1000(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a2, a2, s3
-; RV64IM-NEXT: slli a5, a4, 36
-; RV64IM-NEXT: sd a5, 1024(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a0, a0, a6
-; RV64IM-NEXT: slli a5, a4, 37
-; RV64IM-NEXT: sd a5, 1040(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a3, a3, s1
-; RV64IM-NEXT: slli a5, a4, 38
-; RV64IM-NEXT: sd a5, 1056(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a1, a1, t4
-; RV64IM-NEXT: slli a5, a4, 39
-; RV64IM-NEXT: sd a5, 1064(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a2, a2, a7
-; RV64IM-NEXT: slli a5, a4, 40
-; RV64IM-NEXT: sd a5, 1080(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a0, a0, t1
-; RV64IM-NEXT: slli a5, a4, 41
-; RV64IM-NEXT: sd a5, 1072(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a3, a3, t3
-; RV64IM-NEXT: slli a5, a4, 42
-; RV64IM-NEXT: sd a5, 1088(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a1, a1, t0
-; RV64IM-NEXT: srli a5, a2, 4
-; RV64IM-NEXT: sd ra, 960(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a2, a2, ra
-; RV64IM-NEXT: and a5, a5, ra
-; RV64IM-NEXT: slli a2, a2, 4
-; RV64IM-NEXT: or a2, a5, a2
-; RV64IM-NEXT: srli a5, a0, 4
-; RV64IM-NEXT: and a0, a0, ra
-; RV64IM-NEXT: and a5, a5, ra
+; RV64IM-NEXT: or a1, a3, a1
+; RV64IM-NEXT: slli s3, s0, 11
+; RV64IM-NEXT: or t1, a2, t1
+; RV64IM-NEXT: slli a3, s0, 33
+; RV64IM-NEXT: or a0, a0, a4
+; RV64IM-NEXT: slli a4, s0, 34
+; RV64IM-NEXT: or t2, s1, t2
+; RV64IM-NEXT: slli a2, s0, 40
+; RV64IM-NEXT: or a1, a1, t3
+; RV64IM-NEXT: or t1, t1, s5
+; RV64IM-NEXT: or a0, a0, t5
+; RV64IM-NEXT: or t2, t2, t6
+; RV64IM-NEXT: or a1, a1, a7
+; RV64IM-NEXT: srli a7, t1, 4
+; RV64IM-NEXT: sd s10, 800(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t1, t1, s10
+; RV64IM-NEXT: srli t3, a0, 4
+; RV64IM-NEXT: and a0, a0, s10
+; RV64IM-NEXT: srli t5, t2, 4
+; RV64IM-NEXT: and t2, t2, s10
+; RV64IM-NEXT: srli t6, a1, 4
+; RV64IM-NEXT: and a1, a1, s10
+; RV64IM-NEXT: and a7, a7, s10
+; RV64IM-NEXT: slli t1, t1, 4
+; RV64IM-NEXT: and t3, t3, s10
; RV64IM-NEXT: slli a0, a0, 4
-; RV64IM-NEXT: or a0, a5, a0
-; RV64IM-NEXT: srli a5, a3, 4
-; RV64IM-NEXT: and a3, a3, ra
-; RV64IM-NEXT: and a5, a5, ra
-; RV64IM-NEXT: slli a3, a3, 4
-; RV64IM-NEXT: or a3, a5, a3
-; RV64IM-NEXT: srli a5, a1, 4
-; RV64IM-NEXT: and a1, a1, ra
-; RV64IM-NEXT: and a5, a5, ra
+; RV64IM-NEXT: and t5, t5, s10
+; RV64IM-NEXT: slli t2, t2, 4
+; RV64IM-NEXT: and t6, t6, s10
; RV64IM-NEXT: slli a1, a1, 4
-; RV64IM-NEXT: or a1, a5, a1
-; RV64IM-NEXT: srli a5, a2, 2
-; RV64IM-NEXT: sd s11, 944(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a2, a2, s11
-; RV64IM-NEXT: and a5, a5, s11
-; RV64IM-NEXT: slli a2, a2, 2
-; RV64IM-NEXT: or a2, a5, a2
-; RV64IM-NEXT: srli a5, a0, 2
+; RV64IM-NEXT: or a7, a7, t1
+; RV64IM-NEXT: or a0, t3, a0
+; RV64IM-NEXT: or t1, t5, t2
+; RV64IM-NEXT: or a1, t6, a1
+; RV64IM-NEXT: srli t2, a7, 2
+; RV64IM-NEXT: sd s11, 792(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, a7, s11
+; RV64IM-NEXT: srli t3, a0, 2
; RV64IM-NEXT: and a0, a0, s11
-; RV64IM-NEXT: and a5, a5, s11
-; RV64IM-NEXT: slli a0, a0, 2
-; RV64IM-NEXT: or a0, a5, a0
-; RV64IM-NEXT: srli a5, a3, 2
-; RV64IM-NEXT: and a3, a3, s11
-; RV64IM-NEXT: and a5, a5, s11
-; RV64IM-NEXT: slli a3, a3, 2
-; RV64IM-NEXT: or a5, a5, a3
-; RV64IM-NEXT: srli a3, a1, 2
+; RV64IM-NEXT: srli t5, t1, 2
+; RV64IM-NEXT: and t1, t1, s11
+; RV64IM-NEXT: srli t6, a1, 2
; RV64IM-NEXT: and a1, a1, s11
-; RV64IM-NEXT: and a3, a3, s11
+; RV64IM-NEXT: and t2, t2, s11
+; RV64IM-NEXT: slli a7, a7, 2
+; RV64IM-NEXT: and t3, t3, s11
+; RV64IM-NEXT: slli a0, a0, 2
+; RV64IM-NEXT: and t5, t5, s11
+; RV64IM-NEXT: slli t1, t1, 2
+; RV64IM-NEXT: and t6, t6, s11
; RV64IM-NEXT: slli a1, a1, 2
-; RV64IM-NEXT: or a1, a3, a1
-; RV64IM-NEXT: srli a3, a2, 1
-; RV64IM-NEXT: sd s10, 936(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a2, a2, s10
-; RV64IM-NEXT: and a3, a3, s10
-; RV64IM-NEXT: slli a2, a2, 1
-; RV64IM-NEXT: or a3, a3, a2
-; RV64IM-NEXT: srli a2, a0, 1
-; RV64IM-NEXT: and a0, a0, s10
-; RV64IM-NEXT: and a2, a2, s10
+; RV64IM-NEXT: or a7, t2, a7
+; RV64IM-NEXT: or a0, t3, a0
+; RV64IM-NEXT: or t1, t5, t1
+; RV64IM-NEXT: or a1, t6, a1
+; RV64IM-NEXT: srli t2, a7, 1
+; RV64IM-NEXT: sd ra, 776(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, a7, ra
+; RV64IM-NEXT: srli t3, a0, 1
+; RV64IM-NEXT: and a0, a0, ra
+; RV64IM-NEXT: srli t5, t1, 1
+; RV64IM-NEXT: and t1, t1, ra
+; RV64IM-NEXT: srli t6, a1, 1
+; RV64IM-NEXT: and a1, a1, ra
+; RV64IM-NEXT: and t2, t2, ra
+; RV64IM-NEXT: slli a7, a7, 1
+; RV64IM-NEXT: and t3, t3, ra
; RV64IM-NEXT: slli a0, a0, 1
-; RV64IM-NEXT: or a6, a2, a0
-; RV64IM-NEXT: srli a0, a5, 1
-; RV64IM-NEXT: and a2, a5, s10
-; RV64IM-NEXT: and a0, a0, s10
-; RV64IM-NEXT: slli a2, a2, 1
-; RV64IM-NEXT: or a0, a0, a2
-; RV64IM-NEXT: srli a2, a1, 1
-; RV64IM-NEXT: and a1, a1, s10
-; RV64IM-NEXT: and a2, a2, s10
+; RV64IM-NEXT: and s1, t5, ra
+; RV64IM-NEXT: slli t1, t1, 1
+; RV64IM-NEXT: and s4, t6, ra
; RV64IM-NEXT: slli a1, a1, 1
-; RV64IM-NEXT: or s11, a2, a1
-; RV64IM-NEXT: slli s9, a4, 43
-; RV64IM-NEXT: sd s9, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s8, a4, 44
-; RV64IM-NEXT: sd s8, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s7, a4, 45
-; RV64IM-NEXT: sd s7, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s6, a4, 46
-; RV64IM-NEXT: sd s6, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s5, a4, 47
-; RV64IM-NEXT: sd s5, 448(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s4, a4, 48
-; RV64IM-NEXT: slli s3, a4, 49
-; RV64IM-NEXT: slli s2, a4, 50
-; RV64IM-NEXT: slli s1, a4, 51
-; RV64IM-NEXT: slli s0, a4, 52
-; RV64IM-NEXT: slli t6, a4, 53
-; RV64IM-NEXT: slli t5, a4, 54
-; RV64IM-NEXT: slli t4, a4, 55
-; RV64IM-NEXT: slli t3, a4, 56
-; RV64IM-NEXT: slli t2, a4, 57
-; RV64IM-NEXT: slli t1, a4, 58
-; RV64IM-NEXT: slli t0, a4, 59
-; RV64IM-NEXT: slli a7, a4, 60
-; RV64IM-NEXT: slli a5, a4, 61
-; RV64IM-NEXT: slli a4, a4, 62
-; RV64IM-NEXT: li a1, -1
-; RV64IM-NEXT: slli a2, a1, 63
-; RV64IM-NEXT: lui ra, 4
-; RV64IM-NEXT: lui s10, 8
-; RV64IM-NEXT: andi a1, a6, 2
-; RV64IM-NEXT: sd a1, 928(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 1
-; RV64IM-NEXT: sd a1, 920(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 4
-; RV64IM-NEXT: sd a1, 912(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 8
-; RV64IM-NEXT: sd a1, 904(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 16
-; RV64IM-NEXT: sd a1, 896(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 32
-; RV64IM-NEXT: sd a1, 888(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 64
-; RV64IM-NEXT: sd a1, 880(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 128
-; RV64IM-NEXT: sd a1, 872(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 256
-; RV64IM-NEXT: sd a1, 864(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 512
-; RV64IM-NEXT: sd a1, 856(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 1024
-; RV64IM-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 992(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 840(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 1
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 832(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 2
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 824(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, ra
-; RV64IM-NEXT: sd a1, 816(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s10
-; RV64IM-NEXT: sd a1, 808(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 16
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 800(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 32
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 792(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 64
-; RV64IM-NEXT: and s10, a6, a1
-; RV64IM-NEXT: lui a1, 128
-; RV64IM-NEXT: and ra, a6, a1
-; RV64IM-NEXT: lui a1, 256
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 784(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 512
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 768(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 1024
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 760(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 2048
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 736(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 4096
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 728(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 8192
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 720(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 16384
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 704(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 32768
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 688(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 65536
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 672(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 131072
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 656(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 262144
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 600(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1016(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 592(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 584(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1008(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 568(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 528(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1000(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 504(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 496(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 488(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 480(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 472(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1080(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 464(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1072(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 456(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1088(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s9
-; RV64IM-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s8
-; RV64IM-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s7
-; RV64IM-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s6
-; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s5
-; RV64IM-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s4
+; RV64IM-NEXT: or t5, t2, a7
+; RV64IM-NEXT: or t6, t3, a0
+; RV64IM-NEXT: or a0, s1, t1
+; RV64IM-NEXT: or a5, s4, a1
+; RV64IM-NEXT: and a7, t6, s3
+; RV64IM-NEXT: and t1, t6, s2
+; RV64IM-NEXT: and t2, t6, t0
+; RV64IM-NEXT: and t3, t6, t4
+; RV64IM-NEXT: and s1, t6, s7
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: mul t1, t5, t1
+; RV64IM-NEXT: xor a1, a7, t1
; RV64IM-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s9, s4
-; RV64IM-NEXT: and a1, a6, s3
+; RV64IM-NEXT: and a7, t6, a6
+; RV64IM-NEXT: mul t1, t5, t2
+; RV64IM-NEXT: mul t2, t5, t3
+; RV64IM-NEXT: xor a1, t1, t2
; RV64IM-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s8, s3
-; RV64IM-NEXT: and a1, a6, s2
+; RV64IM-NEXT: and t1, t6, a3
+; RV64IM-NEXT: mul t2, t5, s1
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: xor a1, t2, a7
+; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, t6, a4
+; RV64IM-NEXT: mul t1, t5, t1
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: xor a1, t1, a7
; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s7, s2
-; RV64IM-NEXT: and a1, a6, s1
+; RV64IM-NEXT: and a7, t6, a2
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: slli s1, s0, 41
+; RV64IM-NEXT: and t1, t6, s1
+; RV64IM-NEXT: mul t1, t5, t1
+; RV64IM-NEXT: xor a1, a7, t1
; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s6, s1
-; RV64IM-NEXT: and a1, a6, s0
+; RV64IM-NEXT: slli s4, s0, 48
+; RV64IM-NEXT: and a7, t6, s4
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: slli s6, s0, 49
+; RV64IM-NEXT: and t1, t6, s6
+; RV64IM-NEXT: mul t1, t5, t1
+; RV64IM-NEXT: xor a1, a7, t1
+; RV64IM-NEXT: sd a1, 744(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a7, s0, 56
+; RV64IM-NEXT: and t1, t6, a7
+; RV64IM-NEXT: mul t2, t5, t1
+; RV64IM-NEXT: slli t1, s0, 57
+; RV64IM-NEXT: and t3, t6, t1
+; RV64IM-NEXT: mul t3, t5, t3
+; RV64IM-NEXT: xor a1, t2, t3
+; RV64IM-NEXT: sd a1, 728(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t2, a5, s3
+; RV64IM-NEXT: and t3, a5, s2
+; RV64IM-NEXT: mul t2, a0, t2
+; RV64IM-NEXT: mul t3, a0, t3
+; RV64IM-NEXT: xor a1, t2, t3
+; RV64IM-NEXT: sd a1, 768(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t0, a5, t0
+; RV64IM-NEXT: and t2, a5, t4
+; RV64IM-NEXT: mul t0, a0, t0
+; RV64IM-NEXT: mul t2, a0, t2
+; RV64IM-NEXT: xor a1, t0, t2
+; RV64IM-NEXT: sd a1, 760(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, s7
+; RV64IM-NEXT: and a6, a5, a6
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a1, a1, a6
+; RV64IM-NEXT: sd a1, 752(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a3, a5, a3
+; RV64IM-NEXT: and a4, a5, a4
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: xor a3, a3, a4
+; RV64IM-NEXT: sd a3, 736(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a5, a2
+; RV64IM-NEXT: and s1, a5, s1
+; RV64IM-NEXT: mul a2, a0, a2
+; RV64IM-NEXT: mul a3, a0, s1
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: sd a2, 720(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a5, s4
+; RV64IM-NEXT: and a3, a5, s6
+; RV64IM-NEXT: mul a2, a0, a2
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: sd a2, 712(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a5, a7
+; RV64IM-NEXT: and a3, a5, t1
+; RV64IM-NEXT: mul a2, a0, a2
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: sd a2, 704(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 2
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 1
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 696(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 4
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 2
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 680(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 8
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 3
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 672(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 16
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 4
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 664(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 32
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 5
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 648(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 64
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 6
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 688(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 128
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 7
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 632(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 256
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 8
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 616(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 512
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 9
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 656(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t3, s0, 31
+; RV64IM-NEXT: sd t3, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s8, s0, 32
+; RV64IM-NEXT: sd s8, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a7, s0, 35
+; RV64IM-NEXT: sd a7, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s3, s0, 36
+; RV64IM-NEXT: slli s4, s0, 37
+; RV64IM-NEXT: slli s5, s0, 38
+; RV64IM-NEXT: slli s6, s0, 39
+; RV64IM-NEXT: slli s7, s0, 42
+; RV64IM-NEXT: slli a1, s0, 43
+; RV64IM-NEXT: slli s10, s0, 44
+; RV64IM-NEXT: sd s10, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a3, s0, 45
+; RV64IM-NEXT: slli a4, s0, 46
+; RV64IM-NEXT: sd a4, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t0, s0, 47
+; RV64IM-NEXT: sd t0, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t1, s0, 50
+; RV64IM-NEXT: sd t1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t4, s0, 51
+; RV64IM-NEXT: sd t4, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a2, s0, 52
+; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a2, s0, 53
+; RV64IM-NEXT: sd a2, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s1, s0, 54
+; RV64IM-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s2, s0, 55
+; RV64IM-NEXT: sd s2, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a2, s0, 58
+; RV64IM-NEXT: sd a2, 832(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s9, s0, 59
+; RV64IM-NEXT: sd s9, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s11, s0, 60
+; RV64IM-NEXT: sd s11, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli ra, s0, 61
+; RV64IM-NEXT: slli s0, s0, 62
+; RV64IM-NEXT: sd s0, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: li a2, -1
+; RV64IM-NEXT: slli a2, a2, 63
+; RV64IM-NEXT: sd a2, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 1
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: sd a2, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 2
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 4
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 528(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 8
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 576(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 16
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 608(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 32
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 640(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 256
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 512
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 1024
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 544(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 2048
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 584(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 16384
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: lui a6, 32768
+; RV64IM-NEXT: and t2, t6, a6
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a6, 65536
+; RV64IM-NEXT: and t2, t6, a6
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 504(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a6, 131072
+; RV64IM-NEXT: and t2, t6, a6
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 568(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a6, 262144
+; RV64IM-NEXT: and t2, t6, a6
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 592(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t2, t6, t3
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 600(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t2, t6, s8
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 624(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t2, t6, a7
+; RV64IM-NEXT: mul t2, t5, t2
+; RV64IM-NEXT: and t3, t6, s3
+; RV64IM-NEXT: mul a6, t5, t3
+; RV64IM-NEXT: sd a6, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, t6, s4
+; RV64IM-NEXT: mul a6, t5, t3
+; RV64IM-NEXT: sd a6, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, t6, s5
+; RV64IM-NEXT: mul a6, t5, t3
+; RV64IM-NEXT: sd a6, 512(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, t6, s6
+; RV64IM-NEXT: mul a6, t5, t3
+; RV64IM-NEXT: sd a6, 560(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, t6, s7
+; RV64IM-NEXT: mul t3, t5, t3
+; RV64IM-NEXT: and s0, t6, a1
+; RV64IM-NEXT: mv s8, a1
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, s10
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, a3
+; RV64IM-NEXT: mv s10, a3
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, a4
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 536(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, t0
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 552(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, t1
+; RV64IM-NEXT: mul s0, t5, s0
+; RV64IM-NEXT: and a6, t6, t4
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld t1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, t6, t1
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld t0, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, t6, t0
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a6, t6, s1
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 496(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a6, t6, s2
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 520(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 832(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, t6, a1
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: and a7, t6, s9
+; RV64IM-NEXT: mul a1, t5, a7
; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s5, s0
-; RV64IM-NEXT: and a1, a6, t6
-; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s4, t6
-; RV64IM-NEXT: and a1, a6, t5
-; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s3, t5
-; RV64IM-NEXT: and a1, a6, t4
-; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s2, t4
-; RV64IM-NEXT: and a1, a6, t3
-; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s1, t3
-; RV64IM-NEXT: and a1, a6, t2
-; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t3, t2
-; RV64IM-NEXT: and a1, a6, t1
-; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t2, t1
-; RV64IM-NEXT: and a1, a6, t0
-; RV64IM-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t1, t0
-; RV64IM-NEXT: and a1, a6, a7
-; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t0, a7
-; RV64IM-NEXT: and a1, a6, a5
-; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, a4
-; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a6, a6, a2
-; RV64IM-NEXT: ld a1, 928(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 640(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 920(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 632(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 912(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 624(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 904(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 616(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 896(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 608(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 888(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t4, a3, a1
-; RV64IM-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 712(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 872(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 880(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 864(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t5, a3, a1
-; RV64IM-NEXT: ld a1, 856(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t6, a3, a1
+; RV64IM-NEXT: and a7, t6, s11
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: sd a7, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, t6, ra
+; RV64IM-NEXT: mv s9, ra
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: sd a7, 424(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 848(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 696(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, t6, a1
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: sd a7, 456(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 776(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 856(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s0, a3, a1
-; RV64IM-NEXT: ld a1, 816(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 576(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 680(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 752(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 832(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a7, a3, s10
-; RV64IM-NEXT: sd a7, 904(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a3, ra
-; RV64IM-NEXT: sd a1, 560(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 784(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 552(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 664(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 744(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 736(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 808(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 728(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 544(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 720(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 536(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 704(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 648(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 688(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 720(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 672(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 800(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 656(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 864(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 600(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 912(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 592(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 928(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 584(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 520(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 568(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 512(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 528(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 592(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 504(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 688(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 496(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 768(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 488(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 824(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 480(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 888(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 920(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 464(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 496(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 488(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 568(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 656(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 736(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 792(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 896(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, t6, a1
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: sd a7, 488(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: and a1, a4, t5
+; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t5, t5, 10
+; RV64IM-NEXT: andi a4, t6, 1024
+; RV64IM-NEXT: seqz a4, a4
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: and a4, a4, t5
+; RV64IM-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a4, a1
+; RV64IM-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a3, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, a3
+; RV64IM-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 528(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: xor a1, a1, t2
+; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 600(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 704(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 760(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 816(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 872(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 464(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 456(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 504(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 584(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 672(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
+; RV64IM-NEXT: xor a1, a1, t3
+; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 744(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, s0
+; RV64IM-NEXT: sd a1, 744(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 728(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, a6
; RV64IM-NEXT: sd a1, 728(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 784(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a3, a6
-; RV64IM-NEXT: sd a1, 840(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 992(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s10, s11, a1
-; RV64IM-NEXT: lui a1, 1
-; RV64IM-NEXT: and ra, s11, a1
+; RV64IM-NEXT: andi a2, a5, 2
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 1
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 4
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 2
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 8
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 3
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 16
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 4
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 32
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 5
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 64
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 6
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 128
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 7
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 256
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 8
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 512
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 9
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a1, 2
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 992(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, a5, a1
; RV64IM-NEXT: lui a1, 4
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a1, 8
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a1, 16
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a1, 32
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 64
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 128
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 256
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 512
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a3, 256
+; RV64IM-NEXT: and a3, a5, a3
+; RV64IM-NEXT: lui s1, 512
+; RV64IM-NEXT: and s1, a5, s1
; RV64IM-NEXT: lui a1, 1024
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 2048
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 4096
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 8192
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t6, a5, a1
+; RV64IM-NEXT: lui a2, 2048
+; RV64IM-NEXT: and a2, a5, a2
; RV64IM-NEXT: lui a1, 16384
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a1, 32768
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a1, 65536
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a1, 131072
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill
; RV64IM-NEXT: lui a1, 262144
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1016(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1016(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1048(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1008(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1008(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1032(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1000(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1000(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1024(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1040(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1056(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1064(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1080(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1072(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1088(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, s9
+; RV64IM-NEXT: and t4, a5, a1
+; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and ra, a5, a1
+; RV64IM-NEXT: ld s0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s0, a5, s0
+; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s2, a5, a1
+; RV64IM-NEXT: and s3, a5, s3
+; RV64IM-NEXT: and s4, a5, s4
+; RV64IM-NEXT: and s5, a5, s5
+; RV64IM-NEXT: and s6, a5, s6
+; RV64IM-NEXT: and s7, a5, s7
+; RV64IM-NEXT: and s8, a5, s8
+; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s8, s11, s8
-; RV64IM-NEXT: and a1, s11, s7
-; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, s6
-; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, s5
+; RV64IM-NEXT: and s10, a5, s10
+; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s11, a5, a1
+; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, s4
-; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s3, s11, s3
-; RV64IM-NEXT: and a1, s11, s2
-; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s1, s11, s1
-; RV64IM-NEXT: and t3, s11, t3
-; RV64IM-NEXT: and a1, s11, t2
-; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a6, s11, t1
-; RV64IM-NEXT: and a1, s11, t0
-; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, a5
-; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, a4
-; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, a2
-; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, s11, 2
-; RV64IM-NEXT: andi a2, s11, 1
-; RV64IM-NEXT: andi a3, s11, 4
-; RV64IM-NEXT: andi a4, s11, 8
-; RV64IM-NEXT: andi a5, s11, 16
-; RV64IM-NEXT: andi a7, s11, 32
-; RV64IM-NEXT: andi t0, s11, 64
-; RV64IM-NEXT: andi t1, s11, 128
-; RV64IM-NEXT: andi t2, s11, 256
-; RV64IM-NEXT: andi s2, s11, 512
-; RV64IM-NEXT: andi s5, s11, 1024
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: mul a1, a0, a5
-; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, a7
-; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, t0
-; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, t1
-; RV64IM-NEXT: sd a1, 448(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, t2
-; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, s2
-; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, s5
-; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, s10
-; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, ra
-; RV64IM-NEXT: sd a1, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 992(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 992(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: and a1, a5, t1
; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: and a1, a5, t0
; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1072(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1016(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1088(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s10, a0, a1
-; RV64IM-NEXT: ld a1, 1008(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s9, a0, a1
-; RV64IM-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1000(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1024(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1056(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 832(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t3, a5, a1
+; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t0, a5, a1
+; RV64IM-NEXT: and t5, a5, s9
+; RV64IM-NEXT: ld a1, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t2, a5, a1
+; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, a5, a1
+; RV64IM-NEXT: andi a1, a5, 1
+; RV64IM-NEXT: seqz a4, a1
+; RV64IM-NEXT: mul t1, a0, a7
+; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1080(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s7, a0, a1
-; RV64IM-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s6, a0, a1
-; RV64IM-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a0, a3
+; RV64IM-NEXT: mul a1, a0, s1
+; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, t6
+; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, a2
+; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a0, a1
+; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1008(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1040(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1064(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, t4
+; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, ra
+; RV64IM-NEXT: sd a1, 832(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s0
+; RV64IM-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul t4, a0, s2
+; RV64IM-NEXT: mul ra, a0, s3
+; RV64IM-NEXT: mul a1, a0, s4
+; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s5
+; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s6
+; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul s7, a0, s7
+; RV64IM-NEXT: mul s9, a0, s8
; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s5, a0, a1
-; RV64IM-NEXT: mul s4, a0, s8
-; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s10
+; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s11
+; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a2, a0, a1
+; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s8, a0, a1
+; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, s3
-; RV64IM-NEXT: sd a1, 1016(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1048(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s3, a0, s1
-; RV64IM-NEXT: mul s2, a0, t3
-; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s8, a0, a1
-; RV64IM-NEXT: mul a1, a0, a6
-; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1000(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a0, a0, a1
-; RV64IM-NEXT: sd a0, 1032(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 640(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 632(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s1, a0
-; RV64IM-NEXT: ld a0, 624(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 616(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul s6, a0, t3
+; RV64IM-NEXT: mul s10, a0, t0
+; RV64IM-NEXT: mul t0, a0, t5
+; RV64IM-NEXT: sd t0, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul t0, a0, t2
+; RV64IM-NEXT: sd t0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: sd a6, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: and s2, a4, a0
+; RV64IM-NEXT: slli a0, a0, 10
+; RV64IM-NEXT: andi a5, a5, 1024
+; RV64IM-NEXT: seqz a5, a5
+; RV64IM-NEXT: addi a5, a5, -1
+; RV64IM-NEXT: and a0, a5, a0
+; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 768(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s4, a0, t1
+; RV64IM-NEXT: ld a0, 760(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, a0, a7
+; RV64IM-NEXT: ld s0, 752(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s0, a3
+; RV64IM-NEXT: ld a0, 736(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, a0, t4
+; RV64IM-NEXT: ld a0, 720(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s5, a0, s7
+; RV64IM-NEXT: ld a0, 712(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, a0, a2
+; RV64IM-NEXT: ld a0, 704(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s3, a0, a1
+; RV64IM-NEXT: ld a0, 696(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s7, a1, a0
+; RV64IM-NEXT: ld a0, 680(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 672(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t2, a0, a1
+; RV64IM-NEXT: ld a0, 664(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 648(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t3, a0, a1
-; RV64IM-NEXT: ld a0, 608(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, a0, t4
-; RV64IM-NEXT: xor t5, t5, t6
-; RV64IM-NEXT: ld a0, 576(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, s0, a0
-; RV64IM-NEXT: ld a0, 560(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s0, 552(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, a0, s0
-; RV64IM-NEXT: ld a0, 544(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a5, 536(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a0, a5
-; RV64IM-NEXT: ld a0, 520(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 512(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a0, a1
-; RV64IM-NEXT: ld a0, 496(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 488(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a0, a1
+; RV64IM-NEXT: ld a0, 632(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 616(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, a0, a1
+; RV64IM-NEXT: ld a0, 528(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a3, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, a0
; RV64IM-NEXT: ld a0, 480(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, a0, a1
-; RV64IM-NEXT: ld a0, 464(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, a0, a1
-; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, a2, a0
-; RV64IM-NEXT: xor a0, a3, a4
-; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a4, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, a0
+; RV64IM-NEXT: ld a0, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a5, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, a0
+; RV64IM-NEXT: ld a0, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a1, a0
+; RV64IM-NEXT: ld a0, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a1, a0
+; RV64IM-NEXT: ld a0, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 744(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, a1, a0
+; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 728(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, a1, a0
+; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s2, a0
+; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a1, a2
-; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a3, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, a3
-; RV64IM-NEXT: ld a3, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a4, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: ld a4, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld ra, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, ra
-; RV64IM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s11, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor ra, ra, s11
-; RV64IM-NEXT: xor s9, s10, s9
-; RV64IM-NEXT: xor s6, s7, s6
+; RV64IM-NEXT: ld a2, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, s11
+; RV64IM-NEXT: ld s11, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s4, s4, s11
+; RV64IM-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, t6, s11
+; RV64IM-NEXT: ld s11, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s0, s11
+; RV64IM-NEXT: xor s1, s1, ra
+; RV64IM-NEXT: xor s5, s5, s9
+; RV64IM-NEXT: xor t5, t5, s8
+; RV64IM-NEXT: xor s3, s3, s6
+; RV64IM-NEXT: xor t2, s7, t2
+; RV64IM-NEXT: ld s6, 688(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, t3, s6
+; RV64IM-NEXT: ld s6, 656(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, s6
+; RV64IM-NEXT: ld s6, 576(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, s6
+; RV64IM-NEXT: ld s6, 544(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, s6
+; RV64IM-NEXT: ld s6, 504(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, s6
+; RV64IM-NEXT: ld s6, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, s6
+; RV64IM-NEXT: ld s6, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, s6
+; RV64IM-NEXT: ld s6, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, s6
+; RV64IM-NEXT: ld s6, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, s6
+; RV64IM-NEXT: xor a0, s2, a0
+; RV64IM-NEXT: ld s2, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, s2
+; RV64IM-NEXT: ld s2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, s2
+; RV64IM-NEXT: ld s2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s4, s2
+; RV64IM-NEXT: ld s4, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, t6, s4
+; RV64IM-NEXT: ld s4, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s0, s4
+; RV64IM-NEXT: ld s4, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, s1, s4
+; RV64IM-NEXT: ld s4, 88(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor s4, s5, s4
-; RV64IM-NEXT: xor s2, s3, s2
-; RV64IM-NEXT: xor t3, s1, t3
-; RV64IM-NEXT: ld s1, 712(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t4, s1
-; RV64IM-NEXT: ld s1, 696(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t5, s1
-; RV64IM-NEXT: ld s1, 680(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, t6, s1
-; RV64IM-NEXT: ld s1, 664(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s0, s1
-; RV64IM-NEXT: ld s1, 648(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, s1
-; RV64IM-NEXT: ld s1, 592(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, s1
-; RV64IM-NEXT: ld s1, 568(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, s1
-; RV64IM-NEXT: ld s1, 528(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, s1
-; RV64IM-NEXT: ld s1, 504(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, s1
-; RV64IM-NEXT: xor a0, t2, a0
-; RV64IM-NEXT: ld t2, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, a1, t2
-; RV64IM-NEXT: ld t2, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t2
-; RV64IM-NEXT: ld t2, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t2
-; RV64IM-NEXT: ld t2, 128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, t2
-; RV64IM-NEXT: ld t2, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, ra, t2
-; RV64IM-NEXT: ld s1, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s9, s1
-; RV64IM-NEXT: ld s3, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s3, s6, s3
-; RV64IM-NEXT: ld s5, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s4, s4, s5
-; RV64IM-NEXT: xor s2, s2, s8
-; RV64IM-NEXT: xor t3, t3, t4
-; RV64IM-NEXT: ld t4, 776(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t5, t4
-; RV64IM-NEXT: ld t5, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t6, t5
-; RV64IM-NEXT: ld t6, 744(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, s0, t6
-; RV64IM-NEXT: ld s0, 720(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, s0
-; RV64IM-NEXT: ld s0, 688(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, s0
-; RV64IM-NEXT: ld s0, 656(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, s0
-; RV64IM-NEXT: ld s0, 600(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, s0
-; RV64IM-NEXT: ld s0, 584(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, s0
-; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, a1
-; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, a1
-; RV64IM-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, a1
-; RV64IM-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, a1
-; RV64IM-NEXT: ld s0, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s1, s0
-; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s3, a1
-; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s3, s4, a1
-; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s2, a1
-; RV64IM-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, t3, a1
-; RV64IM-NEXT: ld t3, 856(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, s5
+; RV64IM-NEXT: xor s3, s3, s10
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 384(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t3, t4, t3
-; RV64IM-NEXT: ld t4, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t5, t4
-; RV64IM-NEXT: ld t5, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t6, t5
-; RV64IM-NEXT: ld t6, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, t6
-; RV64IM-NEXT: ld t6, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, t6
-; RV64IM-NEXT: ld t6, 736(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t6
-; RV64IM-NEXT: ld t6, 704(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, t6
-; RV64IM-NEXT: ld t6, 672(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, t6
-; RV64IM-NEXT: ld t6, 448(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a0, t6
-; RV64IM-NEXT: ld t6, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t6
-; RV64IM-NEXT: ld t6, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t6
-; RV64IM-NEXT: ld t6, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, t6
-; RV64IM-NEXT: ld t6, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, t6
-; RV64IM-NEXT: ld t6, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, s0, t6
-; RV64IM-NEXT: ld s0, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s1, s0
-; RV64IM-NEXT: ld s1, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s3, s1
-; RV64IM-NEXT: ld s3, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s2, s3
-; RV64IM-NEXT: ld s3, 904(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t4, s3
-; RV64IM-NEXT: ld s3, 864(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, s3
-; RV64IM-NEXT: ld s3, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, s3
-; RV64IM-NEXT: ld s3, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, s3
-; RV64IM-NEXT: ld s3, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, s3
-; RV64IM-NEXT: ld s3, 728(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, s3
-; RV64IM-NEXT: ld s3, 992(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, s3
-; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, s3
-; RV64IM-NEXT: ld s3, 1024(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, t6, s3
-; RV64IM-NEXT: ld s3, 1008(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s0, s3
-; RV64IM-NEXT: ld s3, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s1, s3
-; RV64IM-NEXT: ld s3, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s2, s3
-; RV64IM-NEXT: xor t3, a1, t3
-; RV64IM-NEXT: xor t3, t3, t4
-; RV64IM-NEXT: ld t4, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 608(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t4
+; RV64IM-NEXT: ld t4, 584(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, t4
+; RV64IM-NEXT: ld t4, 568(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a5, a5, t4
-; RV64IM-NEXT: ld t4, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 512(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a6, a6, t4
-; RV64IM-NEXT: ld t4, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 472(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a7, a7, t4
-; RV64IM-NEXT: ld t4, 816(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 440(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t0, t0, t4
-; RV64IM-NEXT: ld t4, 784(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 424(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t1, t1, t4
-; RV64IM-NEXT: xor a2, a0, a2
-; RV64IM-NEXT: xor a2, a2, a3
-; RV64IM-NEXT: ld a3, 1072(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, t2, a3
-; RV64IM-NEXT: ld t2, 1056(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t6, t2
-; RV64IM-NEXT: ld t4, 1040(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, s0, t4
-; RV64IM-NEXT: ld t6, 1016(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, s2, a2
+; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t6, t4
+; RV64IM-NEXT: ld t6, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, s0, t6
+; RV64IM-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s1, s0
+; RV64IM-NEXT: ld s1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, s4, s1
+; RV64IM-NEXT: ld s2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, s2
+; RV64IM-NEXT: ld s2, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s3, s2
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 640(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t3
+; RV64IM-NEXT: ld t3, 592(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, t3
+; RV64IM-NEXT: ld t3, 560(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, t3
+; RV64IM-NEXT: ld t3, 536(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, t3
+; RV64IM-NEXT: ld t3, 496(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, t3
+; RV64IM-NEXT: ld t3, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, t3
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t6, a2
+; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s0, t3
+; RV64IM-NEXT: ld t6, 192(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t6, s1, t6
-; RV64IM-NEXT: ld s0, 1000(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, s0
+; RV64IM-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor s0, s2, s0
-; RV64IM-NEXT: xor t3, t3, t5
-; RV64IM-NEXT: ld t5, 928(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, t5
-; RV64IM-NEXT: ld t5, 920(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, t5
-; RV64IM-NEXT: ld t5, 896(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t5
-; RV64IM-NEXT: ld t5, 872(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, t5
-; RV64IM-NEXT: ld t5, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, t5
-; RV64IM-NEXT: xor a2, a2, a4
-; RV64IM-NEXT: ld a4, 1088(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: ld a4, 1080(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, t2, a4
-; RV64IM-NEXT: ld t2, 1064(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t4, t2
-; RV64IM-NEXT: ld t4, 1048(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t6, t4
-; RV64IM-NEXT: ld t5, 1032(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, s0, t5
-; RV64IM-NEXT: xor a5, t3, a5
-; RV64IM-NEXT: xor a5, a5, a6
-; RV64IM-NEXT: xor a3, a2, a3
+; RV64IM-NEXT: ld s1, 600(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, s1
+; RV64IM-NEXT: ld s1, 552(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, s1
+; RV64IM-NEXT: ld s1, 520(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, s1
+; RV64IM-NEXT: ld s1, 488(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, s1
+; RV64IM-NEXT: ld s1, 832(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, s1
+; RV64IM-NEXT: ld s1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, t6, s1
+; RV64IM-NEXT: ld s1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, s1
+; RV64IM-NEXT: ld s1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s0, s1
+; RV64IM-NEXT: xor a3, t2, a3
; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: slli a1, a1, 56
+; RV64IM-NEXT: ld a4, 624(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a5, a4
+; RV64IM-NEXT: xor a1, a0, a1
+; RV64IM-NEXT: xor a1, a1, t4
+; RV64IM-NEXT: ld a5, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, a5
+; RV64IM-NEXT: xor a4, a3, a4
+; RV64IM-NEXT: xor a4, a4, a6
+; RV64IM-NEXT: xor a2, a1, a2
+; RV64IM-NEXT: xor a2, a2, t3
+; RV64IM-NEXT: slli t2, t2, 56
; RV64IM-NEXT: slli a0, a0, 56
-; RV64IM-NEXT: ld t6, 952(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a4, t3, t6
-; RV64IM-NEXT: and a2, a2, t6
-; RV64IM-NEXT: slli a4, a4, 40
-; RV64IM-NEXT: slli a2, a2, 40
-; RV64IM-NEXT: or a1, a1, a4
-; RV64IM-NEXT: srli a4, a5, 8
-; RV64IM-NEXT: or a0, a0, a2
-; RV64IM-NEXT: srli a2, a3, 8
-; RV64IM-NEXT: ld a6, 968(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a4, a4, a6
-; RV64IM-NEXT: and a2, a2, a6
-; RV64IM-NEXT: xor a6, a5, a7
+; RV64IM-NEXT: ld t4, 784(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a3, a3, t4
+; RV64IM-NEXT: and a1, a1, t4
+; RV64IM-NEXT: slli a3, a3, 40
+; RV64IM-NEXT: slli a1, a1, 40
+; RV64IM-NEXT: or a3, t2, a3
+; RV64IM-NEXT: srli a5, a4, 8
+; RV64IM-NEXT: or a0, a0, a1
+; RV64IM-NEXT: srli a1, a2, 8
+; RV64IM-NEXT: ld a6, 808(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a5, a5, a6
+; RV64IM-NEXT: and a1, a1, a6
+; RV64IM-NEXT: xor a6, a4, a7
; RV64IM-NEXT: lui t3, 4080
-; RV64IM-NEXT: and a5, a5, t3
-; RV64IM-NEXT: xor a7, a3, t2
-; RV64IM-NEXT: and a3, a3, t3
+; RV64IM-NEXT: and a4, a4, t3
+; RV64IM-NEXT: xor a7, a2, t6
+; RV64IM-NEXT: and a2, a2, t3
; RV64IM-NEXT: xor t0, a6, t0
; RV64IM-NEXT: srli a6, a6, 24
-; RV64IM-NEXT: xor t2, a7, t4
+; RV64IM-NEXT: xor t2, a7, t5
; RV64IM-NEXT: srli a7, a7, 24
; RV64IM-NEXT: and a6, a6, t3
; RV64IM-NEXT: and a7, a7, t3
-; RV64IM-NEXT: or a4, a4, a6
+; RV64IM-NEXT: or a5, a5, a6
; RV64IM-NEXT: srli a6, t0, 40
-; RV64IM-NEXT: or a2, a2, a7
+; RV64IM-NEXT: or a1, a1, a7
; RV64IM-NEXT: srli a7, t2, 40
-; RV64IM-NEXT: and a6, a6, t6
-; RV64IM-NEXT: and a7, a7, t6
-; RV64IM-NEXT: slli a5, a5, 24
+; RV64IM-NEXT: and a6, a6, t4
+; RV64IM-NEXT: and a7, a7, t4
+; RV64IM-NEXT: slli a4, a4, 24
; RV64IM-NEXT: xor t1, t0, t1
; RV64IM-NEXT: srliw t0, t0, 24
; RV64IM-NEXT: slli t0, t0, 32
-; RV64IM-NEXT: or a5, a5, t0
+; RV64IM-NEXT: or a4, a4, t0
; RV64IM-NEXT: srli t0, t1, 56
; RV64IM-NEXT: or a6, a6, t0
-; RV64IM-NEXT: slli a3, a3, 24
-; RV64IM-NEXT: xor t0, t2, t5
+; RV64IM-NEXT: slli a2, a2, 24
+; RV64IM-NEXT: xor t0, t2, s0
; RV64IM-NEXT: srliw t1, t2, 24
; RV64IM-NEXT: slli t1, t1, 32
-; RV64IM-NEXT: or a3, a3, t1
+; RV64IM-NEXT: or a2, a2, t1
; RV64IM-NEXT: srli t0, t0, 56
; RV64IM-NEXT: or a7, a7, t0
-; RV64IM-NEXT: or a1, a1, a5
-; RV64IM-NEXT: or a4, a4, a6
-; RV64IM-NEXT: or a0, a0, a3
-; RV64IM-NEXT: or a2, a2, a7
-; RV64IM-NEXT: or a1, a1, a4
+; RV64IM-NEXT: or a3, a3, a4
+; RV64IM-NEXT: or a4, a5, a6
; RV64IM-NEXT: or a0, a0, a2
-; RV64IM-NEXT: srli a2, a1, 4
-; RV64IM-NEXT: ld a4, 960(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a1, a4
+; RV64IM-NEXT: or a1, a1, a7
+; RV64IM-NEXT: or a3, a3, a4
+; RV64IM-NEXT: or a0, a0, a1
+; RV64IM-NEXT: srli a1, a3, 4
+; RV64IM-NEXT: ld a4, 800(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a2, a3, a4
; RV64IM-NEXT: srli a3, a0, 4
; RV64IM-NEXT: and a0, a0, a4
-; RV64IM-NEXT: and a2, a2, a4
+; RV64IM-NEXT: and a1, a1, a4
; RV64IM-NEXT: and a3, a3, a4
-; RV64IM-NEXT: slli a1, a1, 4
+; RV64IM-NEXT: slli a2, a2, 4
; RV64IM-NEXT: slli a0, a0, 4
-; RV64IM-NEXT: or a1, a2, a1
+; RV64IM-NEXT: or a1, a1, a2
; RV64IM-NEXT: or a0, a3, a0
; RV64IM-NEXT: srli a2, a1, 2
-; RV64IM-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a4, 792(sp) # 8-byte Folded Reload
; RV64IM-NEXT: and a1, a1, a4
; RV64IM-NEXT: srli a3, a0, 2
; RV64IM-NEXT: and a0, a0, a4
@@ -10366,7 +9370,7 @@ define void @commutative_clmulh_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %
; RV64IM-NEXT: slli a0, a0, 2
; RV64IM-NEXT: or a0, a3, a0
; RV64IM-NEXT: srli a3, a1, 1
-; RV64IM-NEXT: ld a4, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a4, 776(sp) # 8-byte Folded Reload
; RV64IM-NEXT: and a1, a1, a4
; RV64IM-NEXT: and a4, a0, a4
; RV64IM-NEXT: srli a0, a0, 1
@@ -10378,3445 +9382,3468 @@ define void @commutative_clmulh_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %
; RV64IM-NEXT: or a0, a0, a4
; RV64IM-NEXT: srli a1, a1, 1
; RV64IM-NEXT: srli a0, a0, 1
-; RV64IM-NEXT: ld a2, 976(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 816(sp) # 8-byte Folded Reload
; RV64IM-NEXT: sd a1, 0(a2)
; RV64IM-NEXT: sd a0, 8(a2)
-; RV64IM-NEXT: ld a2, 984(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 824(sp) # 8-byte Folded Reload
; RV64IM-NEXT: sd a1, 0(a2)
; RV64IM-NEXT: sd a0, 8(a2)
-; RV64IM-NEXT: ld ra, 1192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s0, 1184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 1176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s2, 1168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s3, 1160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 1152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s5, 1144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s6, 1136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s7, 1128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s8, 1120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s9, 1112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 1104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s11, 1096(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: addi sp, sp, 1200
+; RV64IM-NEXT: ld ra, 952(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 944(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s2, 928(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s3, 920(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s4, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s5, 904(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s6, 896(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s7, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s8, 880(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s9, 872(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s10, 864(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 856(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 960
; RV64IM-NEXT: ret
;
; RV32IMZBS-LABEL: commutative_clmulh_v2i64:
; RV32IMZBS: # %bb.0:
-; RV32IMZBS-NEXT: addi sp, sp, -752
-; RV32IMZBS-NEXT: sw ra, 748(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s0, 744(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s1, 740(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s2, 736(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s3, 732(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s4, 728(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s5, 724(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s6, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s7, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s8, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s9, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s10, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s11, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a2, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a4, 0(a0)
-; RV32IMZBS-NEXT: lw s5, 4(a0)
-; RV32IMZBS-NEXT: lw t1, 8(a0)
-; RV32IMZBS-NEXT: lw t5, 12(a0)
-; RV32IMZBS-NEXT: lw a7, 0(a1)
-; RV32IMZBS-NEXT: lw s1, 4(a1)
-; RV32IMZBS-NEXT: lw t3, 8(a1)
-; RV32IMZBS-NEXT: lw t6, 12(a1)
+; RV32IMZBS-NEXT: addi sp, sp, -800
+; RV32IMZBS-NEXT: sw ra, 796(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s0, 792(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s1, 788(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s2, 784(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s3, 780(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s4, 776(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s5, 772(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s6, 768(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s7, 764(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s8, 760(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s9, 756(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s10, 752(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s11, 748(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a3, 736(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a2, 732(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 0(a0)
+; RV32IMZBS-NEXT: lw a4, 4(a0)
+; RV32IMZBS-NEXT: lw a7, 8(a0)
+; RV32IMZBS-NEXT: lw t1, 12(a0)
+; RV32IMZBS-NEXT: lw s4, 0(a1)
+; RV32IMZBS-NEXT: lw t6, 4(a1)
+; RV32IMZBS-NEXT: lw t0, 8(a1)
+; RV32IMZBS-NEXT: lw t2, 12(a1)
; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: lui t2, 61681
-; RV32IMZBS-NEXT: lui t0, 209715
-; RV32IMZBS-NEXT: lui s7, 349525
-; RV32IMZBS-NEXT: bseti s8, zero, 11
-; RV32IMZBS-NEXT: addi s9, a0, -256
-; RV32IMZBS-NEXT: srli a0, s1, 8
-; RV32IMZBS-NEXT: srli s0, s1, 24
-; RV32IMZBS-NEXT: and a1, s1, s9
-; RV32IMZBS-NEXT: slli s1, s1, 24
-; RV32IMZBS-NEXT: srli a2, s5, 8
-; RV32IMZBS-NEXT: srli t4, s5, 24
-; RV32IMZBS-NEXT: and s3, s5, s9
-; RV32IMZBS-NEXT: slli s5, s5, 24
-; RV32IMZBS-NEXT: srli a5, a7, 8
-; RV32IMZBS-NEXT: srli a3, a7, 24
-; RV32IMZBS-NEXT: and a6, a7, s9
+; RV32IMZBS-NEXT: lui a1, 61681
+; RV32IMZBS-NEXT: lui a2, 209715
+; RV32IMZBS-NEXT: lui a3, 349525
+; RV32IMZBS-NEXT: addi s10, a0, -256
+; RV32IMZBS-NEXT: addi s9, a1, -241
+; RV32IMZBS-NEXT: addi s8, a2, 819
+; RV32IMZBS-NEXT: addi s6, a3, 1365
+; RV32IMZBS-NEXT: srli s0, t6, 8
+; RV32IMZBS-NEXT: srli s1, t6, 24
+; RV32IMZBS-NEXT: and t5, t6, s10
+; RV32IMZBS-NEXT: slli t6, t6, 24
+; RV32IMZBS-NEXT: srli a3, a4, 8
+; RV32IMZBS-NEXT: srli a5, a4, 24
+; RV32IMZBS-NEXT: and t3, a4, s10
+; RV32IMZBS-NEXT: slli a4, a4, 24
+; RV32IMZBS-NEXT: srli a2, s4, 8
+; RV32IMZBS-NEXT: srli s2, s4, 24
+; RV32IMZBS-NEXT: and a0, s4, s10
+; RV32IMZBS-NEXT: slli a1, s4, 24
+; RV32IMZBS-NEXT: srli s4, s7, 8
+; RV32IMZBS-NEXT: srli t4, s7, 24
+; RV32IMZBS-NEXT: and a6, s7, s10
+; RV32IMZBS-NEXT: slli s7, s7, 24
+; RV32IMZBS-NEXT: srli s3, t2, 8
+; RV32IMZBS-NEXT: and s0, s0, s10
+; RV32IMZBS-NEXT: or s0, s0, s1
+; RV32IMZBS-NEXT: srli s5, t2, 24
+; RV32IMZBS-NEXT: slli t5, t5, 8
+; RV32IMZBS-NEXT: or t5, t6, t5
+; RV32IMZBS-NEXT: and t6, t2, s10
+; RV32IMZBS-NEXT: slli s1, t2, 24
+; RV32IMZBS-NEXT: and a3, a3, s10
+; RV32IMZBS-NEXT: or t2, a3, a5
+; RV32IMZBS-NEXT: srli a3, t1, 8
+; RV32IMZBS-NEXT: slli t3, t3, 8
+; RV32IMZBS-NEXT: or t3, a4, t3
+; RV32IMZBS-NEXT: srli a5, t1, 24
+; RV32IMZBS-NEXT: and a2, a2, s10
+; RV32IMZBS-NEXT: or a2, a2, s2
+; RV32IMZBS-NEXT: and a4, t1, s10
+; RV32IMZBS-NEXT: slli t1, t1, 24
+; RV32IMZBS-NEXT: slli a0, a0, 8
+; RV32IMZBS-NEXT: or a0, a1, a0
+; RV32IMZBS-NEXT: srli s2, t0, 8
+; RV32IMZBS-NEXT: and a1, s4, s10
+; RV32IMZBS-NEXT: or a1, a1, t4
+; RV32IMZBS-NEXT: srli t4, t0, 24
+; RV32IMZBS-NEXT: slli a6, a6, 8
+; RV32IMZBS-NEXT: or s4, s7, a6
+; RV32IMZBS-NEXT: and s7, t0, s10
+; RV32IMZBS-NEXT: slli a6, t0, 24
+; RV32IMZBS-NEXT: and t0, s3, s10
+; RV32IMZBS-NEXT: or t0, t0, s5
+; RV32IMZBS-NEXT: srli s3, a7, 8
+; RV32IMZBS-NEXT: slli t6, t6, 8
+; RV32IMZBS-NEXT: or t6, s1, t6
+; RV32IMZBS-NEXT: srli s1, a7, 24
+; RV32IMZBS-NEXT: and a3, a3, s10
+; RV32IMZBS-NEXT: or a3, a3, a5
+; RV32IMZBS-NEXT: and a5, a7, s10
; RV32IMZBS-NEXT: slli a7, a7, 24
+; RV32IMZBS-NEXT: slli a4, a4, 8
+; RV32IMZBS-NEXT: and s2, s2, s10
+; RV32IMZBS-NEXT: slli s7, s7, 8
+; RV32IMZBS-NEXT: and s3, s3, s10
+; RV32IMZBS-NEXT: slli a5, a5, 8
+; RV32IMZBS-NEXT: or a4, t1, a4
+; RV32IMZBS-NEXT: or t1, s2, t4
+; RV32IMZBS-NEXT: or a6, a6, s7
+; RV32IMZBS-NEXT: or t4, s3, s1
+; RV32IMZBS-NEXT: or a5, a7, a5
+; RV32IMZBS-NEXT: or a7, t5, s0
+; RV32IMZBS-NEXT: or t2, t3, t2
+; RV32IMZBS-NEXT: or a0, a0, a2
+; RV32IMZBS-NEXT: or a1, s4, a1
+; RV32IMZBS-NEXT: or a2, t6, t0
+; RV32IMZBS-NEXT: or a3, a4, a3
+; RV32IMZBS-NEXT: or a4, a6, t1
+; RV32IMZBS-NEXT: or a5, a5, t4
+; RV32IMZBS-NEXT: srli a6, a7, 4
+; RV32IMZBS-NEXT: sw s9, 744(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a7, a7, s9
+; RV32IMZBS-NEXT: srli t0, t2, 4
+; RV32IMZBS-NEXT: and t1, t2, s9
+; RV32IMZBS-NEXT: srli t2, a0, 4
; RV32IMZBS-NEXT: and a0, a0, s9
-; RV32IMZBS-NEXT: or a0, a0, s0
-; RV32IMZBS-NEXT: srli s0, a4, 8
-; RV32IMZBS-NEXT: slli a1, a1, 8
-; RV32IMZBS-NEXT: or a1, s1, a1
-; RV32IMZBS-NEXT: srli s2, a4, 24
+; RV32IMZBS-NEXT: srli t3, a1, 4
+; RV32IMZBS-NEXT: and a1, a1, s9
+; RV32IMZBS-NEXT: srli t4, a2, 4
; RV32IMZBS-NEXT: and a2, a2, s9
-; RV32IMZBS-NEXT: or a2, a2, t4
-; RV32IMZBS-NEXT: and t4, a4, s9
-; RV32IMZBS-NEXT: slli s1, a4, 24
-; RV32IMZBS-NEXT: slli s3, s3, 8
-; RV32IMZBS-NEXT: or a4, s5, s3
-; RV32IMZBS-NEXT: srli s3, t6, 8
+; RV32IMZBS-NEXT: srli t5, a3, 4
+; RV32IMZBS-NEXT: and a3, a3, s9
+; RV32IMZBS-NEXT: srli t6, a4, 4
+; RV32IMZBS-NEXT: and a4, a4, s9
+; RV32IMZBS-NEXT: srli s0, a5, 4
; RV32IMZBS-NEXT: and a5, a5, s9
-; RV32IMZBS-NEXT: or a5, a5, a3
-; RV32IMZBS-NEXT: srli s5, t6, 24
-; RV32IMZBS-NEXT: slli a6, a6, 8
-; RV32IMZBS-NEXT: or a6, a7, a6
-; RV32IMZBS-NEXT: and s4, t6, s9
-; RV32IMZBS-NEXT: slli s6, t6, 24
-; RV32IMZBS-NEXT: and a3, s0, s9
-; RV32IMZBS-NEXT: or a3, a3, s2
-; RV32IMZBS-NEXT: srli t6, t5, 8
-; RV32IMZBS-NEXT: slli t4, t4, 8
-; RV32IMZBS-NEXT: or a7, s1, t4
-; RV32IMZBS-NEXT: srli s1, t5, 24
-; RV32IMZBS-NEXT: and t4, s3, s9
-; RV32IMZBS-NEXT: or t4, t4, s5
-; RV32IMZBS-NEXT: and s0, t5, s9
-; RV32IMZBS-NEXT: slli s2, t5, 24
-; RV32IMZBS-NEXT: slli s4, s4, 8
-; RV32IMZBS-NEXT: or t5, s6, s4
-; RV32IMZBS-NEXT: srli s3, t3, 8
-; RV32IMZBS-NEXT: and t6, t6, s9
-; RV32IMZBS-NEXT: or s5, t6, s1
-; RV32IMZBS-NEXT: srli s4, t3, 24
-; RV32IMZBS-NEXT: slli s0, s0, 8
-; RV32IMZBS-NEXT: or s1, s2, s0
-; RV32IMZBS-NEXT: and s2, t3, s9
-; RV32IMZBS-NEXT: slli t3, t3, 24
-; RV32IMZBS-NEXT: and s0, s3, s9
-; RV32IMZBS-NEXT: or t6, s0, s4
-; RV32IMZBS-NEXT: srli s3, t1, 8
-; RV32IMZBS-NEXT: slli s2, s2, 8
-; RV32IMZBS-NEXT: or s2, t3, s2
-; RV32IMZBS-NEXT: srli t3, t1, 24
-; RV32IMZBS-NEXT: and s3, s3, s9
-; RV32IMZBS-NEXT: or s3, s3, t3
-; RV32IMZBS-NEXT: and t3, t1, s9
-; RV32IMZBS-NEXT: sw s9, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli t1, t1, 24
-; RV32IMZBS-NEXT: slli t3, t3, 8
-; RV32IMZBS-NEXT: or s4, t1, t3
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: or a2, a4, a2
-; RV32IMZBS-NEXT: or a1, a6, a5
-; RV32IMZBS-NEXT: or a3, a7, a3
-; RV32IMZBS-NEXT: or a4, t5, t4
-; RV32IMZBS-NEXT: addi t3, t2, -241
-; RV32IMZBS-NEXT: sw t3, 684(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: addi t2, t0, 819
-; RV32IMZBS-NEXT: or a5, s1, s5
-; RV32IMZBS-NEXT: addi t1, s7, 1365
-; RV32IMZBS-NEXT: or a6, s2, t6
-; RV32IMZBS-NEXT: or a7, s4, s3
-; RV32IMZBS-NEXT: srli t0, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, t3
-; RV32IMZBS-NEXT: srli t4, a2, 4
-; RV32IMZBS-NEXT: and a2, a2, t3
-; RV32IMZBS-NEXT: and t0, t0, t3
+; RV32IMZBS-NEXT: and a6, a6, s9
+; RV32IMZBS-NEXT: slli a7, a7, 4
+; RV32IMZBS-NEXT: and t0, t0, s9
+; RV32IMZBS-NEXT: slli t1, t1, 4
+; RV32IMZBS-NEXT: and t2, t2, s9
; RV32IMZBS-NEXT: slli a0, a0, 4
-; RV32IMZBS-NEXT: or a0, t0, a0
-; RV32IMZBS-NEXT: srli t0, a1, 4
-; RV32IMZBS-NEXT: and a1, a1, t3
-; RV32IMZBS-NEXT: and t4, t4, t3
-; RV32IMZBS-NEXT: slli a2, a2, 4
-; RV32IMZBS-NEXT: or a2, t4, a2
-; RV32IMZBS-NEXT: srli t4, a3, 4
-; RV32IMZBS-NEXT: and a3, a3, t3
-; RV32IMZBS-NEXT: and t0, t0, t3
+; RV32IMZBS-NEXT: and t3, t3, s9
; RV32IMZBS-NEXT: slli a1, a1, 4
-; RV32IMZBS-NEXT: or a1, t0, a1
-; RV32IMZBS-NEXT: srli t0, a4, 4
-; RV32IMZBS-NEXT: and a4, a4, t3
-; RV32IMZBS-NEXT: and t4, t4, t3
+; RV32IMZBS-NEXT: and t4, t4, s9
+; RV32IMZBS-NEXT: slli a2, a2, 4
+; RV32IMZBS-NEXT: and t5, t5, s9
; RV32IMZBS-NEXT: slli a3, a3, 4
-; RV32IMZBS-NEXT: or a3, t4, a3
-; RV32IMZBS-NEXT: srli t4, a5, 4
-; RV32IMZBS-NEXT: and a5, a5, t3
-; RV32IMZBS-NEXT: and t0, t0, t3
+; RV32IMZBS-NEXT: and t6, t6, s9
; RV32IMZBS-NEXT: slli a4, a4, 4
-; RV32IMZBS-NEXT: or a4, t0, a4
-; RV32IMZBS-NEXT: srli t0, a6, 4
-; RV32IMZBS-NEXT: and a6, a6, t3
-; RV32IMZBS-NEXT: and t4, t4, t3
+; RV32IMZBS-NEXT: and s0, s0, s9
; RV32IMZBS-NEXT: slli a5, a5, 4
-; RV32IMZBS-NEXT: or a5, t4, a5
-; RV32IMZBS-NEXT: srli t4, a7, 4
-; RV32IMZBS-NEXT: and a7, a7, t3
-; RV32IMZBS-NEXT: and t0, t0, t3
-; RV32IMZBS-NEXT: slli a6, a6, 4
-; RV32IMZBS-NEXT: and t4, t4, t3
-; RV32IMZBS-NEXT: slli a7, a7, 4
-; RV32IMZBS-NEXT: or a6, t0, a6
-; RV32IMZBS-NEXT: or a7, t4, a7
-; RV32IMZBS-NEXT: srli t0, a0, 2
-; RV32IMZBS-NEXT: sw t2, 688(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a0, t2
+; RV32IMZBS-NEXT: or a6, a6, a7
+; RV32IMZBS-NEXT: or a7, t0, t1
+; RV32IMZBS-NEXT: or a0, t2, a0
+; RV32IMZBS-NEXT: or a1, t3, a1
+; RV32IMZBS-NEXT: or a2, t4, a2
+; RV32IMZBS-NEXT: or a3, t5, a3
+; RV32IMZBS-NEXT: or a4, t6, a4
+; RV32IMZBS-NEXT: or a5, s0, a5
+; RV32IMZBS-NEXT: srli t0, a6, 2
+; RV32IMZBS-NEXT: sw s8, 728(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a6, a6, s8
+; RV32IMZBS-NEXT: srli t1, a7, 2
+; RV32IMZBS-NEXT: and a7, a7, s8
+; RV32IMZBS-NEXT: srli t2, a0, 2
+; RV32IMZBS-NEXT: and a0, a0, s8
+; RV32IMZBS-NEXT: srli t3, a1, 2
+; RV32IMZBS-NEXT: and a1, a1, s8
; RV32IMZBS-NEXT: srli t4, a2, 2
-; RV32IMZBS-NEXT: and a2, a2, t2
-; RV32IMZBS-NEXT: and t0, t0, t2
+; RV32IMZBS-NEXT: and a2, a2, s8
+; RV32IMZBS-NEXT: srli t5, a3, 2
+; RV32IMZBS-NEXT: and a3, a3, s8
+; RV32IMZBS-NEXT: srli t6, a4, 2
+; RV32IMZBS-NEXT: and a4, a4, s8
+; RV32IMZBS-NEXT: srli s0, a5, 2
+; RV32IMZBS-NEXT: and a5, a5, s8
+; RV32IMZBS-NEXT: and t0, t0, s8
+; RV32IMZBS-NEXT: slli a6, a6, 2
+; RV32IMZBS-NEXT: and t1, t1, s8
+; RV32IMZBS-NEXT: slli a7, a7, 2
+; RV32IMZBS-NEXT: and t2, t2, s8
; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: or a0, t0, a0
-; RV32IMZBS-NEXT: srli t0, a1, 2
-; RV32IMZBS-NEXT: and a1, a1, t2
-; RV32IMZBS-NEXT: and t4, t4, t2
-; RV32IMZBS-NEXT: slli a2, a2, 2
-; RV32IMZBS-NEXT: or a2, t4, a2
-; RV32IMZBS-NEXT: srli t4, a3, 2
-; RV32IMZBS-NEXT: and a3, a3, t2
-; RV32IMZBS-NEXT: and t0, t0, t2
+; RV32IMZBS-NEXT: and t3, t3, s8
; RV32IMZBS-NEXT: slli a1, a1, 2
-; RV32IMZBS-NEXT: or a1, t0, a1
-; RV32IMZBS-NEXT: srli t0, a4, 2
-; RV32IMZBS-NEXT: and a4, a4, t2
-; RV32IMZBS-NEXT: and t4, t4, t2
+; RV32IMZBS-NEXT: and t4, t4, s8
+; RV32IMZBS-NEXT: slli a2, a2, 2
+; RV32IMZBS-NEXT: and t5, t5, s8
; RV32IMZBS-NEXT: slli a3, a3, 2
-; RV32IMZBS-NEXT: or a3, t4, a3
-; RV32IMZBS-NEXT: srli t4, a5, 2
-; RV32IMZBS-NEXT: and a5, a5, t2
-; RV32IMZBS-NEXT: and t0, t0, t2
+; RV32IMZBS-NEXT: and t6, t6, s8
; RV32IMZBS-NEXT: slli a4, a4, 2
-; RV32IMZBS-NEXT: or a4, t0, a4
-; RV32IMZBS-NEXT: srli t0, a6, 2
-; RV32IMZBS-NEXT: and a6, a6, t2
-; RV32IMZBS-NEXT: and t4, t4, t2
+; RV32IMZBS-NEXT: and s0, s0, s8
; RV32IMZBS-NEXT: slli a5, a5, 2
-; RV32IMZBS-NEXT: or t4, t4, a5
-; RV32IMZBS-NEXT: srli a5, a7, 2
-; RV32IMZBS-NEXT: and a7, a7, t2
-; RV32IMZBS-NEXT: and t0, t0, t2
-; RV32IMZBS-NEXT: slli a6, a6, 2
-; RV32IMZBS-NEXT: and a5, a5, t2
-; RV32IMZBS-NEXT: slli a7, a7, 2
-; RV32IMZBS-NEXT: or t0, t0, a6
-; RV32IMZBS-NEXT: or t5, a5, a7
-; RV32IMZBS-NEXT: srli a5, a0, 1
-; RV32IMZBS-NEXT: mv t2, t1
-; RV32IMZBS-NEXT: sw t1, 692(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a0, t1
-; RV32IMZBS-NEXT: srli a7, a2, 1
-; RV32IMZBS-NEXT: and a2, a2, t1
-; RV32IMZBS-NEXT: and a5, a5, t1
-; RV32IMZBS-NEXT: slli a0, a0, 1
-; RV32IMZBS-NEXT: or a6, a5, a0
-; RV32IMZBS-NEXT: srli a0, a1, 1
-; RV32IMZBS-NEXT: and a1, a1, t1
-; RV32IMZBS-NEXT: and a5, a7, t1
-; RV32IMZBS-NEXT: slli a2, a2, 1
-; RV32IMZBS-NEXT: or a5, a5, a2
-; RV32IMZBS-NEXT: srli a2, a3, 1
-; RV32IMZBS-NEXT: and a7, a3, t1
-; RV32IMZBS-NEXT: and a0, a0, t1
-; RV32IMZBS-NEXT: slli a1, a1, 1
-; RV32IMZBS-NEXT: or a3, a0, a1
-; RV32IMZBS-NEXT: srli a0, a4, 1
-; RV32IMZBS-NEXT: and a1, a4, t1
-; RV32IMZBS-NEXT: and a2, a2, t1
+; RV32IMZBS-NEXT: or a6, t0, a6
+; RV32IMZBS-NEXT: or a7, t1, a7
+; RV32IMZBS-NEXT: or a0, t2, a0
+; RV32IMZBS-NEXT: or a1, t3, a1
+; RV32IMZBS-NEXT: or a2, t4, a2
+; RV32IMZBS-NEXT: or a3, t5, a3
+; RV32IMZBS-NEXT: or a4, t6, a4
+; RV32IMZBS-NEXT: or a5, s0, a5
+; RV32IMZBS-NEXT: srli t0, a6, 1
+; RV32IMZBS-NEXT: sw s6, 724(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a6, a6, s6
+; RV32IMZBS-NEXT: srli t1, a7, 1
+; RV32IMZBS-NEXT: and a7, a7, s6
+; RV32IMZBS-NEXT: srli t2, a0, 1
+; RV32IMZBS-NEXT: and a0, a0, s6
+; RV32IMZBS-NEXT: srli t3, a1, 1
+; RV32IMZBS-NEXT: and a1, a1, s6
+; RV32IMZBS-NEXT: srli t4, a2, 1
+; RV32IMZBS-NEXT: and a2, a2, s6
+; RV32IMZBS-NEXT: srli t5, a3, 1
+; RV32IMZBS-NEXT: and a3, a3, s6
+; RV32IMZBS-NEXT: srli t6, a4, 1
+; RV32IMZBS-NEXT: and a4, a4, s6
+; RV32IMZBS-NEXT: srli s0, a5, 1
+; RV32IMZBS-NEXT: and a5, a5, s6
+; RV32IMZBS-NEXT: and t0, t0, s6
+; RV32IMZBS-NEXT: slli a6, a6, 1
+; RV32IMZBS-NEXT: and t1, t1, s6
; RV32IMZBS-NEXT: slli a7, a7, 1
-; RV32IMZBS-NEXT: or a7, a2, a7
-; RV32IMZBS-NEXT: srli a2, t4, 1
-; RV32IMZBS-NEXT: and a4, t4, t1
-; RV32IMZBS-NEXT: and a0, a0, t1
-; RV32IMZBS-NEXT: slli a1, a1, 1
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: srli a1, t0, 1
-; RV32IMZBS-NEXT: and t0, t0, t1
-; RV32IMZBS-NEXT: and a2, a2, t1
+; RV32IMZBS-NEXT: and t2, t2, s6
+; RV32IMZBS-NEXT: slli s1, a0, 1
+; RV32IMZBS-NEXT: and t3, t3, s6
+; RV32IMZBS-NEXT: slli s2, a1, 1
+; RV32IMZBS-NEXT: and t4, t4, s6
+; RV32IMZBS-NEXT: slli s3, a2, 1
+; RV32IMZBS-NEXT: and s4, t5, s6
+; RV32IMZBS-NEXT: slli s5, a3, 1
+; RV32IMZBS-NEXT: and t6, t6, s6
; RV32IMZBS-NEXT: slli a4, a4, 1
-; RV32IMZBS-NEXT: or t1, a2, a4
-; RV32IMZBS-NEXT: srli a4, t5, 1
-; RV32IMZBS-NEXT: and t4, t5, t2
-; RV32IMZBS-NEXT: and a1, a1, t2
-; RV32IMZBS-NEXT: slli t0, t0, 1
-; RV32IMZBS-NEXT: and a4, a4, t2
-; RV32IMZBS-NEXT: slli t4, t4, 1
-; RV32IMZBS-NEXT: or a1, a1, t0
-; RV32IMZBS-NEXT: or a4, a4, t4
-; RV32IMZBS-NEXT: srli t0, a6, 8
-; RV32IMZBS-NEXT: srli t4, a6, 24
-; RV32IMZBS-NEXT: and t0, t0, s9
-; RV32IMZBS-NEXT: or a2, t0, t4
-; RV32IMZBS-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s1, 1024
-; RV32IMZBS-NEXT: lui s6, 2048
-; RV32IMZBS-NEXT: lui s5, 4096
-; RV32IMZBS-NEXT: lui t2, 8192
-; RV32IMZBS-NEXT: lui t3, 16384
-; RV32IMZBS-NEXT: lui t4, 32768
-; RV32IMZBS-NEXT: lui t5, 65536
-; RV32IMZBS-NEXT: lui s9, 131072
-; RV32IMZBS-NEXT: lui s10, 262144
-; RV32IMZBS-NEXT: lui ra, 524288
-; RV32IMZBS-NEXT: andi a2, a7, 2
-; RV32IMZBS-NEXT: sw a2, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 1
-; RV32IMZBS-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 4
-; RV32IMZBS-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 8
-; RV32IMZBS-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 16
-; RV32IMZBS-NEXT: sw a2, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 32
-; RV32IMZBS-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 64
-; RV32IMZBS-NEXT: sw a2, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 128
-; RV32IMZBS-NEXT: sw a2, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 256
-; RV32IMZBS-NEXT: sw a2, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 512
-; RV32IMZBS-NEXT: sw a2, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 1024
-; RV32IMZBS-NEXT: sw a2, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mv t6, s8
-; RV32IMZBS-NEXT: sw s8, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a2, a7, s8
-; RV32IMZBS-NEXT: sw a2, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s2, 1
-; RV32IMZBS-NEXT: and a2, a7, s2
-; RV32IMZBS-NEXT: sw a2, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 2
-; RV32IMZBS-NEXT: and a2, a7, a2
-; RV32IMZBS-NEXT: sw a2, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s3, 4
-; RV32IMZBS-NEXT: and a2, a7, s3
-; RV32IMZBS-NEXT: sw a2, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s4, 8
-; RV32IMZBS-NEXT: and a2, a7, s4
-; RV32IMZBS-NEXT: sw a2, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 16
-; RV32IMZBS-NEXT: and t0, a7, a2
-; RV32IMZBS-NEXT: sw t0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 32
-; RV32IMZBS-NEXT: and t0, a7, t0
-; RV32IMZBS-NEXT: sw t0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s11, 64
-; RV32IMZBS-NEXT: and t0, a7, s11
-; RV32IMZBS-NEXT: sw t0, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 128
-; RV32IMZBS-NEXT: and s0, a7, t0
-; RV32IMZBS-NEXT: sw s0, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s0, 256
-; RV32IMZBS-NEXT: and s0, a7, s0
-; RV32IMZBS-NEXT: sw s0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s7, 512
-; RV32IMZBS-NEXT: and s0, a7, s7
-; RV32IMZBS-NEXT: sw s0, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s0, a7, s1
-; RV32IMZBS-NEXT: sw s0, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s0, a7, s6
-; RV32IMZBS-NEXT: sw s0, 456(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s0, a7, s5
-; RV32IMZBS-NEXT: sw s0, 452(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t2, a7, t2
-; RV32IMZBS-NEXT: sw t2, 448(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t2, a7, t3
-; RV32IMZBS-NEXT: sw t2, 444(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t2, a7, t4
-; RV32IMZBS-NEXT: sw t2, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t2, a7, t5
-; RV32IMZBS-NEXT: sw t2, 436(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t2, a7, s9
-; RV32IMZBS-NEXT: sw t2, 432(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t2, a7, s10
-; RV32IMZBS-NEXT: sw t2, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a7, a7, ra
-; RV32IMZBS-NEXT: sw a7, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s0, a5, 2
-; RV32IMZBS-NEXT: sw s0, 320(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s1, a5, 1
-; RV32IMZBS-NEXT: sw s1, 316(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s5, a5, 4
-; RV32IMZBS-NEXT: sw s5, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s7, a5, 8
-; RV32IMZBS-NEXT: sw s7, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s8, a5, 16
-; RV32IMZBS-NEXT: sw s8, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi ra, a5, 32
-; RV32IMZBS-NEXT: andi t5, a5, 64
-; RV32IMZBS-NEXT: sw t5, 292(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s9, a5, 128
-; RV32IMZBS-NEXT: andi t3, a5, 256
-; RV32IMZBS-NEXT: andi t4, a5, 512
-; RV32IMZBS-NEXT: sw t4, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t2, a5, 1024
-; RV32IMZBS-NEXT: and s10, a5, t6
-; RV32IMZBS-NEXT: and a7, a5, s2
-; RV32IMZBS-NEXT: sw a7, 264(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a7, 2
-; RV32IMZBS-NEXT: and s2, a5, a7
-; RV32IMZBS-NEXT: sw s2, 284(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s3, a5, s3
-; RV32IMZBS-NEXT: sw s3, 280(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s4, a5, s4
-; RV32IMZBS-NEXT: sw s4, 276(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a7, a5, a2
-; RV32IMZBS-NEXT: sw a7, 296(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 32
-; RV32IMZBS-NEXT: and s6, a5, a2
-; RV32IMZBS-NEXT: sw s6, 272(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s11, a5, s11
-; RV32IMZBS-NEXT: sw s11, 304(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a2, a5, t0
-; RV32IMZBS-NEXT: sw a2, 300(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 256
-; RV32IMZBS-NEXT: and t6, a5, t0
-; RV32IMZBS-NEXT: sw t6, 288(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 512
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 1024
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 256(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 2048
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 4096
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 8192
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 252(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 16384
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 32768
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 65536
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 131072
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 262144
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 524288
-; RV32IMZBS-NEXT: and t0, a5, t0
-; RV32IMZBS-NEXT: sw t0, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s0
-; RV32IMZBS-NEXT: sw t0, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s1
-; RV32IMZBS-NEXT: sw t0, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s5
-; RV32IMZBS-NEXT: sw t0, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s7
-; RV32IMZBS-NEXT: sw t0, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s8
-; RV32IMZBS-NEXT: sw t0, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, ra
-; RV32IMZBS-NEXT: sw t0, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, t5
-; RV32IMZBS-NEXT: sw t0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s9
-; RV32IMZBS-NEXT: sw t0, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, t3
-; RV32IMZBS-NEXT: sw t0, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, t4
-; RV32IMZBS-NEXT: sw t0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, t2
-; RV32IMZBS-NEXT: sw t0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s10
-; RV32IMZBS-NEXT: sw t0, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw t5, 264(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a3, t5
-; RV32IMZBS-NEXT: sw t0, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s2
-; RV32IMZBS-NEXT: sw t0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s3
-; RV32IMZBS-NEXT: sw t0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s4
-; RV32IMZBS-NEXT: sw t0, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, a7
-; RV32IMZBS-NEXT: sw t0, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s6
-; RV32IMZBS-NEXT: sw t0, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, s11
-; RV32IMZBS-NEXT: sw t0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, a2
-; RV32IMZBS-NEXT: sw t0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, a3, t6
-; RV32IMZBS-NEXT: sw t0, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw t6, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a3, t6
-; RV32IMZBS-NEXT: sw t0, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw t4, 256(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a3, t4
-; RV32IMZBS-NEXT: sw a2, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s6, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a3, s6
-; RV32IMZBS-NEXT: sw a2, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s5, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a3, s5
-; RV32IMZBS-NEXT: sw t0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s0, 252(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a3, s0
-; RV32IMZBS-NEXT: sw t0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s1, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a3, s1
-; RV32IMZBS-NEXT: sw t0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s2, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a3, s2
-; RV32IMZBS-NEXT: sw t0, 628(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s3, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a3, s3
-; RV32IMZBS-NEXT: sw t0, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s4, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a3, s4
-; RV32IMZBS-NEXT: sw a2, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s7, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a3, s7
-; RV32IMZBS-NEXT: sw a2, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s8, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a3, s8
-; RV32IMZBS-NEXT: sw a3, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 372(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 608(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 368(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 364(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 360(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 356(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 352(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 348(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 344(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 340(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 336(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 332(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 464(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 460(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 328(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 456(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 324(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 452(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 448(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 436(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: sw a3, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 432(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 268(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 428(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a6, a2
-; RV32IMZBS-NEXT: lw a2, 424(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a6, a2
-; RV32IMZBS-NEXT: sw t0, 456(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 320(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 320(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 316(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 316(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 312(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s11, a6, a2
-; RV32IMZBS-NEXT: lw a2, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul ra, a6, ra
-; RV32IMZBS-NEXT: lw a2, 292(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a7, a6, a2
-; RV32IMZBS-NEXT: sw a7, 452(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a7, a6, s9
-; RV32IMZBS-NEXT: sw a7, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a7, a6, t3
-; RV32IMZBS-NEXT: lw a2, 408(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a6, a2
-; RV32IMZBS-NEXT: mul t2, a6, t2
-; RV32IMZBS-NEXT: sw t2, 448(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t2, a6, s10
-; RV32IMZBS-NEXT: sw t2, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t2, a6, t5
-; RV32IMZBS-NEXT: sw t2, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 284(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s10, a6, a2
-; RV32IMZBS-NEXT: lw a2, 280(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t5, a6, a2
-; RV32IMZBS-NEXT: lw a2, 276(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t2, a6, a2
-; RV32IMZBS-NEXT: sw t2, 444(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 296(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 272(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 300(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a6, a2
-; RV32IMZBS-NEXT: lw t2, 288(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t2, a6, t2
-; RV32IMZBS-NEXT: mul t3, a6, t6
-; RV32IMZBS-NEXT: sw t3, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t3, a6, t4
-; RV32IMZBS-NEXT: sw t3, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t3, a6, s6
-; RV32IMZBS-NEXT: sw t3, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t3, a6, s5
-; RV32IMZBS-NEXT: mul t4, a6, s0
-; RV32IMZBS-NEXT: mul t6, a6, s1
-; RV32IMZBS-NEXT: sw t6, 436(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t6, a6, s2
-; RV32IMZBS-NEXT: sw t6, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t6, a6, s3
-; RV32IMZBS-NEXT: sw t6, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t6, a6, s4
-; RV32IMZBS-NEXT: sw t6, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t6, a6, s7
-; RV32IMZBS-NEXT: sw t6, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t6, a6, s8
-; RV32IMZBS-NEXT: sw t6, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli s3, a6, 24
-; RV32IMZBS-NEXT: lw s9, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a6, a6, s9
-; RV32IMZBS-NEXT: slli a6, a6, 8
-; RV32IMZBS-NEXT: or a6, s3, a6
-; RV32IMZBS-NEXT: sw a6, 432(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli a6, a5, 8
-; RV32IMZBS-NEXT: and a6, a6, s9
-; RV32IMZBS-NEXT: srli s3, a5, 24
-; RV32IMZBS-NEXT: or a6, a6, s3
-; RV32IMZBS-NEXT: sw a6, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli a6, a5, 24
-; RV32IMZBS-NEXT: and a5, a5, s9
-; RV32IMZBS-NEXT: slli a5, a5, 8
-; RV32IMZBS-NEXT: or a5, a6, a5
+; RV32IMZBS-NEXT: and s0, s0, s6
+; RV32IMZBS-NEXT: slli t5, a5, 1
+; RV32IMZBS-NEXT: or s7, t0, a6
+; RV32IMZBS-NEXT: or a0, t1, a7
+; RV32IMZBS-NEXT: or ra, t2, s1
+; RV32IMZBS-NEXT: srli a2, a7, 31
+; RV32IMZBS-NEXT: or a1, t3, s2
+; RV32IMZBS-NEXT: srli a3, s2, 31
+; RV32IMZBS-NEXT: or a7, t4, s3
+; RV32IMZBS-NEXT: or t1, s4, s5
+; RV32IMZBS-NEXT: or t0, t6, a4
+; RV32IMZBS-NEXT: srli t3, s5, 31
+; RV32IMZBS-NEXT: or t2, s0, t5
+; RV32IMZBS-NEXT: srli t5, t5, 31
+; RV32IMZBS-NEXT: srli a4, s7, 8
+; RV32IMZBS-NEXT: srli a5, s7, 24
+; RV32IMZBS-NEXT: srli a6, a0, 8
+; RV32IMZBS-NEXT: srli t4, a0, 24
+; RV32IMZBS-NEXT: slli t6, a0, 24
+; RV32IMZBS-NEXT: and s0, a0, s10
+; RV32IMZBS-NEXT: slli s1, ra, 31
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: slli s2, s7, 31
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: srli s3, a7, 8
+; RV32IMZBS-NEXT: and a4, a4, s10
+; RV32IMZBS-NEXT: or a4, a4, a5
+; RV32IMZBS-NEXT: srli s4, a7, 24
+; RV32IMZBS-NEXT: and a5, a6, s10
+; RV32IMZBS-NEXT: or a5, a5, t4
+; RV32IMZBS-NEXT: srli t4, t1, 8
+; RV32IMZBS-NEXT: slli s0, s0, 8
+; RV32IMZBS-NEXT: or a6, t6, s0
+; RV32IMZBS-NEXT: srli t6, t1, 24
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and s1, a2, s1
+; RV32IMZBS-NEXT: sw s1, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a3, a3, s2
+; RV32IMZBS-NEXT: sw a3, 712(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, s2
+; RV32IMZBS-NEXT: sw a2, 720(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s0, t1, 24
+; RV32IMZBS-NEXT: and a2, s3, s10
+; RV32IMZBS-NEXT: or a3, a2, s4
+; RV32IMZBS-NEXT: and s1, t1, s10
+; RV32IMZBS-NEXT: and a2, t4, s10
+; RV32IMZBS-NEXT: or a2, a2, t6
+; RV32IMZBS-NEXT: slli t6, t0, 31
+; RV32IMZBS-NEXT: seqz s2, t3
+; RV32IMZBS-NEXT: slli s1, s1, 8
+; RV32IMZBS-NEXT: or t3, s0, s1
+; RV32IMZBS-NEXT: slli t4, a7, 31
+; RV32IMZBS-NEXT: seqz s0, t5
+; RV32IMZBS-NEXT: addi t5, s2, -1
+; RV32IMZBS-NEXT: addi s0, s0, -1
+; RV32IMZBS-NEXT: and t6, t5, t6
+; RV32IMZBS-NEXT: sw t6, 708(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and t6, s0, t4
+; RV32IMZBS-NEXT: sw t6, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and t4, t5, t4
+; RV32IMZBS-NEXT: sw t4, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and t4, s7, s10
+; RV32IMZBS-NEXT: sw s10, 740(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t4, t4, 8
+; RV32IMZBS-NEXT: slli t5, s7, 24
+; RV32IMZBS-NEXT: or t4, t5, t4
+; RV32IMZBS-NEXT: or a4, t4, a4
+; RV32IMZBS-NEXT: sw a4, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or a4, a6, a5
+; RV32IMZBS-NEXT: sw a4, 696(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a0, 2
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 2
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 1
+; RV32IMZBS-NEXT: and s5, a4, a6
+; RV32IMZBS-NEXT: slli a6, s7, 1
+; RV32IMZBS-NEXT: and s3, a5, a6
+; RV32IMZBS-NEXT: and s2, a4, a6
+; RV32IMZBS-NEXT: andi a4, a0, 4
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 4
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 2
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 2
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and s4, a4, a6
+; RV32IMZBS-NEXT: andi a4, a0, 8
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 8
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 3
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 3
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a0, 16
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 16
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 4
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 4
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a0, 32
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 32
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 5
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 576(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 5
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a0, 64
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 64
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 6
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 6
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a0, 128
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 128
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 7
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 7
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 560(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a0, 256
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 256
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 8
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 8
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a0, 512
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 512
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 9
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 9
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a0, 1024
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: andi a5, a1, 1024
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 10
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 10
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not t6, a0
+; RV32IMZBS-NEXT: bexti a4, t6, 11
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: not s0, a1
+; RV32IMZBS-NEXT: bexti a5, s0, 11
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 11
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 464(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 11
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 476(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 488(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 12
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 12
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 12
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 452(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 12
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 460(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 468(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 13
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 13
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 13
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 508(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 13
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 544(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 14
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 14
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 14
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 592(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 14
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 15
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 15
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 15
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 656(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 15
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 664(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 16
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 16
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 16
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 416(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 16
+; RV32IMZBS-NEXT: and a5, a5, a6
; RV32IMZBS-NEXT: sw a5, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a6, a5
-; RV32IMZBS-NEXT: sw a5, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 428(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 17
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 17
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 17
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 408(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 17
+; RV32IMZBS-NEXT: and a5, a5, a6
; RV32IMZBS-NEXT: sw a5, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: sw a5, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: sw a5, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: sw a5, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 416(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: sw a5, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: sw a5, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 380(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: sw a5, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 372(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a6, a5
-; RV32IMZBS-NEXT: sw a5, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 368(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 364(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 420(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 18
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 18
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 18
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 444(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 18
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 448(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 456(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 19
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 19
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 19
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 504(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 19
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 512(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 520(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 20
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 20
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 20
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 564(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 20
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 568(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 572(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 21
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 21
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 21
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 21
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 22
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 22
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 22
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 372(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 22
+; RV32IMZBS-NEXT: and a5, a5, a6
; RV32IMZBS-NEXT: sw a5, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 360(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 356(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: sw a5, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 352(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 348(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: sw a5, 372(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 344(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 340(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 384(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 23
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 23
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 23
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 364(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 23
+; RV32IMZBS-NEXT: and a5, a5, a6
; RV32IMZBS-NEXT: sw a5, 368(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 336(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 332(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: sw a5, 364(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 328(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 324(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 376(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 24
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 24
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 24
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 396(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a5, a5, t5
+; RV32IMZBS-NEXT: sw a5, 400(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, t5
+; RV32IMZBS-NEXT: sw a4, 404(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 25
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 25
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 25
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 432(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 25
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 436(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 440(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 26
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 26
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 26
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 472(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 26
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 480(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 484(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 27
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 27
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 27
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 492(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 27
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 496(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 500(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 28
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 28
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 28
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 532(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 28
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: sw a5, 528(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a6
+; RV32IMZBS-NEXT: sw a4, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a4, t6, 29
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: bexti a5, s0, 29
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, ra, 29
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: sw a6, 356(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s7, 29
+; RV32IMZBS-NEXT: and a5, a5, a6
; RV32IMZBS-NEXT: sw a5, 360(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 268(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a5, a3
-; RV32IMZBS-NEXT: sw a3, 356(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a3, 320(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 316(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, s0, a3
-; RV32IMZBS-NEXT: sw s0, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a3, 312(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, s11
-; RV32IMZBS-NEXT: sw a3, 352(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a3, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, ra
-; RV32IMZBS-NEXT: sw a3, 348(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a3, a7, t0
-; RV32IMZBS-NEXT: sw a3, 344(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a3, s10, t5
-; RV32IMZBS-NEXT: sw a3, 340(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a2, a2, t2
-; RV32IMZBS-NEXT: sw a2, 336(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a2, t3, t4
-; RV32IMZBS-NEXT: sw a2, 332(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli a3, a0, 8
-; RV32IMZBS-NEXT: and a3, a3, s9
-; RV32IMZBS-NEXT: srli a5, a0, 24
-; RV32IMZBS-NEXT: or a3, a3, a5
-; RV32IMZBS-NEXT: sw a3, 328(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 2
-; RV32IMZBS-NEXT: sw a2, 312(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 1
-; RV32IMZBS-NEXT: sw a2, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 4
-; RV32IMZBS-NEXT: sw a2, 304(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 8
-; RV32IMZBS-NEXT: sw a2, 288(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 16
-; RV32IMZBS-NEXT: sw a2, 284(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 32
-; RV32IMZBS-NEXT: sw a2, 276(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 64
-; RV32IMZBS-NEXT: sw a2, 272(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 128
+; RV32IMZBS-NEXT: and a6, a4, a6
+; RV32IMZBS-NEXT: andi a0, a0, 1
+; RV32IMZBS-NEXT: andi a1, a1, 1
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, ra, 30
+; RV32IMZBS-NEXT: and a5, a0, ra
+; RV32IMZBS-NEXT: sw a5, 320(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, s7
+; RV32IMZBS-NEXT: sw a1, 344(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and ra, a0, s7
+; RV32IMZBS-NEXT: slli s7, s7, 30
+; RV32IMZBS-NEXT: bexti a0, t6, 30
+; RV32IMZBS-NEXT: bexti a1, s0, 30
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a4, a0, a4
+; RV32IMZBS-NEXT: sw a4, 312(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, s7
+; RV32IMZBS-NEXT: sw a1, 340(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and s7, a0, s7
+; RV32IMZBS-NEXT: and a0, a7, s10
+; RV32IMZBS-NEXT: slli a0, a0, 8
+; RV32IMZBS-NEXT: slli s1, a7, 24
+; RV32IMZBS-NEXT: or a0, s1, a0
+; RV32IMZBS-NEXT: or a0, a0, a3
+; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or a0, t3, a2
+; RV32IMZBS-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 2
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 1
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 316(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 1
+; RV32IMZBS-NEXT: and t3, a1, a2
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 4
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 4
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 2
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 292(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 2
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 8
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 3
+; RV32IMZBS-NEXT: and a2, a0, a2
; RV32IMZBS-NEXT: sw a2, 268(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 256
-; RV32IMZBS-NEXT: sw a2, 264(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 512
-; RV32IMZBS-NEXT: sw a2, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a4, 1024
+; RV32IMZBS-NEXT: slli a2, a7, 3
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 284(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 16
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 4
+; RV32IMZBS-NEXT: and a2, a0, a2
; RV32IMZBS-NEXT: sw a2, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s9, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a2, a4, s9
-; RV32IMZBS-NEXT: sw a2, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a5, 1
-; RV32IMZBS-NEXT: and a2, a4, a5
-; RV32IMZBS-NEXT: sw a2, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a6, 2
-; RV32IMZBS-NEXT: and a2, a4, a6
-; RV32IMZBS-NEXT: sw a2, 212(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t3, 4
-; RV32IMZBS-NEXT: and a2, a4, t3
-; RV32IMZBS-NEXT: sw a2, 156(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t4, 8
-; RV32IMZBS-NEXT: and a2, a4, t4
-; RV32IMZBS-NEXT: sw a2, 144(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 16
-; RV32IMZBS-NEXT: and a2, a4, a2
-; RV32IMZBS-NEXT: sw a2, 92(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 32
-; RV32IMZBS-NEXT: and a2, a4, a2
-; RV32IMZBS-NEXT: sw a2, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 64
-; RV32IMZBS-NEXT: and a2, a4, a2
-; RV32IMZBS-NEXT: sw a2, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 128
-; RV32IMZBS-NEXT: and a2, a4, a2
-; RV32IMZBS-NEXT: sw a2, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 256
-; RV32IMZBS-NEXT: and a2, a4, a2
-; RV32IMZBS-NEXT: sw a2, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s11, 512
-; RV32IMZBS-NEXT: and a2, a4, s11
-; RV32IMZBS-NEXT: sw a2, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s10, 1024
-; RV32IMZBS-NEXT: and a2, a4, s10
-; RV32IMZBS-NEXT: sw a2, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s8, 2048
-; RV32IMZBS-NEXT: and a2, a4, s8
-; RV32IMZBS-NEXT: sw a2, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s6, 4096
-; RV32IMZBS-NEXT: and a2, a4, s6
-; RV32IMZBS-NEXT: sw a2, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s7, 8192
-; RV32IMZBS-NEXT: and a2, a4, s7
-; RV32IMZBS-NEXT: sw a2, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t2, 16384
-; RV32IMZBS-NEXT: and a2, a4, t2
-; RV32IMZBS-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 32768
-; RV32IMZBS-NEXT: and a2, a4, t0
-; RV32IMZBS-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a7, 65536
-; RV32IMZBS-NEXT: and a2, a4, a7
-; RV32IMZBS-NEXT: sw a2, 20(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui ra, 131072
-; RV32IMZBS-NEXT: and a2, a4, ra
-; RV32IMZBS-NEXT: sw a2, 8(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s5, 262144
-; RV32IMZBS-NEXT: and a2, a4, s5
-; RV32IMZBS-NEXT: sw a2, 4(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 524288
-; RV32IMZBS-NEXT: and a4, a4, a2
-; RV32IMZBS-NEXT: sw a4, 0(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, t1, 2
-; RV32IMZBS-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, t1, 1
-; RV32IMZBS-NEXT: sw a3, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, t1, 4
-; RV32IMZBS-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, t1, 8
-; RV32IMZBS-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, t1, 16
-; RV32IMZBS-NEXT: sw a3, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, t1, 32
-; RV32IMZBS-NEXT: sw a3, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, t1, 64
-; RV32IMZBS-NEXT: sw a3, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, t1, 128
-; RV32IMZBS-NEXT: sw a3, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s2, t1, 256
-; RV32IMZBS-NEXT: sw s2, 12(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s3, t1, 512
-; RV32IMZBS-NEXT: andi s1, t1, 1024
-; RV32IMZBS-NEXT: sw s1, 16(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s4, t1, s9
-; RV32IMZBS-NEXT: and s0, t1, a5
-; RV32IMZBS-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t5, t1, a6
-; RV32IMZBS-NEXT: sw t5, 32(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t6, t1, t3
-; RV32IMZBS-NEXT: and t4, t1, t4
-; RV32IMZBS-NEXT: sw t4, 40(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 16
-; RV32IMZBS-NEXT: and t3, t1, a3
-; RV32IMZBS-NEXT: sw t3, 44(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 32
-; RV32IMZBS-NEXT: and a6, t1, a3
-; RV32IMZBS-NEXT: sw a6, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 64
-; RV32IMZBS-NEXT: and a5, t1, a3
-; RV32IMZBS-NEXT: sw a5, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a3, 128
-; RV32IMZBS-NEXT: and a3, t1, a3
-; RV32IMZBS-NEXT: lui a4, 256
-; RV32IMZBS-NEXT: and a4, t1, a4
-; RV32IMZBS-NEXT: and s11, t1, s11
-; RV32IMZBS-NEXT: and s10, t1, s10
-; RV32IMZBS-NEXT: and s8, t1, s8
-; RV32IMZBS-NEXT: and s6, t1, s6
-; RV32IMZBS-NEXT: and s7, t1, s7
-; RV32IMZBS-NEXT: and t2, t1, t2
-; RV32IMZBS-NEXT: and t0, t1, t0
-; RV32IMZBS-NEXT: and a7, t1, a7
-; RV32IMZBS-NEXT: and ra, t1, ra
-; RV32IMZBS-NEXT: and s9, t1, s5
-; RV32IMZBS-NEXT: and s5, t1, a2
-; RV32IMZBS-NEXT: lw a2, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a1, a2
-; RV32IMZBS-NEXT: sw a2, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a1, a2
-; RV32IMZBS-NEXT: sw a2, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a1, a2
-; RV32IMZBS-NEXT: sw a2, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a1, a2
+; RV32IMZBS-NEXT: slli a2, a7, 4
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 32
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 5
+; RV32IMZBS-NEXT: and a2, a0, a2
; RV32IMZBS-NEXT: sw a2, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a1, a2
-; RV32IMZBS-NEXT: sw a2, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a1, a2
-; RV32IMZBS-NEXT: sw a2, 216(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a1, a2
+; RV32IMZBS-NEXT: slli a2, a7, 5
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 64
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 6
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 296(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 6
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 304(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 324(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 128
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 7
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 184(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 7
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 256
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 8
+; RV32IMZBS-NEXT: and a2, a0, a2
; RV32IMZBS-NEXT: sw a2, 168(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a1, a2
-; RV32IMZBS-NEXT: sw a2, 208(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s2
+; RV32IMZBS-NEXT: slli a2, a7, 8
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 180(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 188(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 512
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 9
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 228(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 9
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t2, 1024
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 10
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 332(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 10
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 336(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a1, t1
+; RV32IMZBS-NEXT: bexti a0, a1, 11
+; RV32IMZBS-NEXT: addi a2, a0, -1
+; RV32IMZBS-NEXT: not a0, t2
+; RV32IMZBS-NEXT: bexti a3, a0, 11
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 11
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 112(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 11
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 128(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 136(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 12
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 12
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 88(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 12
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 104(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 116(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 13
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 13
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 140(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 13
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 164(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 176(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 14
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 14
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 240(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 14
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 244(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 260(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 15
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 276(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 15
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 280(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 288(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 16
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 16
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 52(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 16
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 60(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 17
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli s0, t0, 17
+; RV32IMZBS-NEXT: and s0, a2, s0
+; RV32IMZBS-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 17
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 48(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 56(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 18
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 18
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 76(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 18
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 84(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 92(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 19
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 19
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 144(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 19
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 160(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 172(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 20
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 20
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 192(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 20
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 196(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
; RV32IMZBS-NEXT: sw a2, 204(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s3
-; RV32IMZBS-NEXT: mv s2, s3
-; RV32IMZBS-NEXT: sw a2, 160(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s1
-; RV32IMZBS-NEXT: sw a2, 184(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s4
-; RV32IMZBS-NEXT: mv s3, s4
-; RV32IMZBS-NEXT: sw a2, 200(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s0
-; RV32IMZBS-NEXT: sw a2, 196(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, t5
+; RV32IMZBS-NEXT: bexti a2, a1, 21
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 21
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 212(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 21
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 216(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 220(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 22
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 22
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 20(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, a7, 22
+; RV32IMZBS-NEXT: and a4, a3, a5
+; RV32IMZBS-NEXT: and a2, a2, a5
+; RV32IMZBS-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 23
+; RV32IMZBS-NEXT: addi a5, a2, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 23
+; RV32IMZBS-NEXT: addi a3, a2, -1
+; RV32IMZBS-NEXT: slli a2, t0, 23
+; RV32IMZBS-NEXT: and a2, a5, a2
+; RV32IMZBS-NEXT: sw a2, 16(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t4, a7, 23
+; RV32IMZBS-NEXT: and a3, a3, t4
+; RV32IMZBS-NEXT: and a2, a5, t4
+; RV32IMZBS-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti t4, a1, 24
+; RV32IMZBS-NEXT: addi t6, t4, -1
+; RV32IMZBS-NEXT: bexti t4, a0, 24
+; RV32IMZBS-NEXT: addi t5, t4, -1
+; RV32IMZBS-NEXT: slli t4, t0, 24
+; RV32IMZBS-NEXT: and a2, t6, t4
+; RV32IMZBS-NEXT: sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, t5, s1
+; RV32IMZBS-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, t6, s1
+; RV32IMZBS-NEXT: sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s1, a1, 25
+; RV32IMZBS-NEXT: addi s8, s1, -1
+; RV32IMZBS-NEXT: bexti s1, a0, 25
+; RV32IMZBS-NEXT: addi s6, s1, -1
+; RV32IMZBS-NEXT: slli s1, t0, 25
+; RV32IMZBS-NEXT: and a2, s8, s1
+; RV32IMZBS-NEXT: sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s9, a7, 25
+; RV32IMZBS-NEXT: and a2, s6, s9
+; RV32IMZBS-NEXT: sw a2, 72(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, s8, s9
+; RV32IMZBS-NEXT: sw a2, 80(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s8, a1, 26
+; RV32IMZBS-NEXT: addi s8, s8, -1
+; RV32IMZBS-NEXT: bexti s9, a0, 26
+; RV32IMZBS-NEXT: addi s9, s9, -1
+; RV32IMZBS-NEXT: slli s10, t0, 26
+; RV32IMZBS-NEXT: and a2, s8, s10
+; RV32IMZBS-NEXT: sw a2, 96(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s10, a7, 26
+; RV32IMZBS-NEXT: and a2, s9, s10
+; RV32IMZBS-NEXT: sw a2, 100(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, s8, s10
+; RV32IMZBS-NEXT: sw a2, 108(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s8, a1, 27
+; RV32IMZBS-NEXT: addi s8, s8, -1
+; RV32IMZBS-NEXT: bexti s9, a0, 27
+; RV32IMZBS-NEXT: addi s9, s9, -1
+; RV32IMZBS-NEXT: slli s10, t0, 27
+; RV32IMZBS-NEXT: and a2, s8, s10
+; RV32IMZBS-NEXT: sw a2, 120(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s10, a7, 27
+; RV32IMZBS-NEXT: and a2, s9, s10
+; RV32IMZBS-NEXT: sw a2, 124(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, s8, s10
+; RV32IMZBS-NEXT: sw a2, 132(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s8, a1, 28
+; RV32IMZBS-NEXT: addi s8, s8, -1
+; RV32IMZBS-NEXT: bexti s9, a0, 28
+; RV32IMZBS-NEXT: addi s9, s9, -1
+; RV32IMZBS-NEXT: slli s10, t0, 28
+; RV32IMZBS-NEXT: and a2, s8, s10
; RV32IMZBS-NEXT: sw a2, 152(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, t6
-; RV32IMZBS-NEXT: mv t5, t6
-; RV32IMZBS-NEXT: sw a2, 180(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, t4
-; RV32IMZBS-NEXT: sw a2, 300(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, t3
-; RV32IMZBS-NEXT: sw a2, 192(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, a6
-; RV32IMZBS-NEXT: sw a2, 188(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, a5
+; RV32IMZBS-NEXT: slli s10, a7, 28
+; RV32IMZBS-NEXT: and a2, s9, s10
; RV32IMZBS-NEXT: sw a2, 148(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, a3
-; RV32IMZBS-NEXT: sw a2, 176(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, a4
-; RV32IMZBS-NEXT: sw a2, 296(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s11
-; RV32IMZBS-NEXT: sw a2, 320(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s10
-; RV32IMZBS-NEXT: sw a2, 172(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s8
-; RV32IMZBS-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s6
-; RV32IMZBS-NEXT: sw a2, 128(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, s7
-; RV32IMZBS-NEXT: sw a2, 280(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, t2
-; RV32IMZBS-NEXT: sw a2, 292(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, t0
-; RV32IMZBS-NEXT: sw a2, 316(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, a7
-; RV32IMZBS-NEXT: sw a2, 324(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a1, ra
-; RV32IMZBS-NEXT: sw a2, 140(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mv a2, ra
-; RV32IMZBS-NEXT: mul a6, a1, s9
-; RV32IMZBS-NEXT: sw a6, 136(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mv a6, s9
-; RV32IMZBS-NEXT: mul a1, a1, s5
-; RV32IMZBS-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mv t3, s5
-; RV32IMZBS-NEXT: lw a1, 312(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 288(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 284(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 276(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 256(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 268(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 264(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 252(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 272(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t4, a0, a1
-; RV32IMZBS-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t6, a0, a1
-; RV32IMZBS-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 156(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 288(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s1, a0, a1
-; RV32IMZBS-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 284(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s4, a0, a1
-; RV32IMZBS-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s5, a0, a1
-; RV32IMZBS-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 276(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 304(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 312(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul ra, a0, a1
-; RV32IMZBS-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 0(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s9, a0, a1
-; RV32IMZBS-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, s2
-; RV32IMZBS-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s2, a0, a1
-; RV32IMZBS-NEXT: mul a1, a0, s3
-; RV32IMZBS-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, t5
-; RV32IMZBS-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t5, a0, a1
-; RV32IMZBS-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, a3
-; RV32IMZBS-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: mul s3, a0, s11
-; RV32IMZBS-NEXT: mul a1, a0, s10
-; RV32IMZBS-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, s8
-; RV32IMZBS-NEXT: sw a1, 156(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a5, a0, s6
-; RV32IMZBS-NEXT: mul a3, a0, s7
-; RV32IMZBS-NEXT: mul t2, a0, t2
-; RV32IMZBS-NEXT: mul a1, a0, t0
-; RV32IMZBS-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, a7
-; RV32IMZBS-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, a2
-; RV32IMZBS-NEXT: sw a1, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, a6
-; RV32IMZBS-NEXT: sw a1, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a0, t3
-; RV32IMZBS-NEXT: sw a1, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli a1, a0, 24
-; RV32IMZBS-NEXT: lw t3, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, a0, t3
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a6, a1, a0
-; RV32IMZBS-NEXT: srli a0, t1, 8
-; RV32IMZBS-NEXT: and a0, a0, t3
-; RV32IMZBS-NEXT: srli a2, t1, 24
-; RV32IMZBS-NEXT: or a7, a0, a2
-; RV32IMZBS-NEXT: slli a2, t1, 24
-; RV32IMZBS-NEXT: and t1, t1, t3
-; RV32IMZBS-NEXT: slli t1, t1, 8
-; RV32IMZBS-NEXT: or a2, a2, t1
-; RV32IMZBS-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, a1, a0
-; RV32IMZBS-NEXT: lw a0, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, a0, a1
-; RV32IMZBS-NEXT: lw a0, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 208(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, a0, a1
-; RV32IMZBS-NEXT: lw a0, 200(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, a0, a1
-; RV32IMZBS-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 188(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, a0, a1
-; RV32IMZBS-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, a0, a1
-; RV32IMZBS-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, a1, a0
-; RV32IMZBS-NEXT: lw a0, 120(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor t6, t4, t6
-; RV32IMZBS-NEXT: xor s0, s0, s1
-; RV32IMZBS-NEXT: xor s1, s4, s5
-; RV32IMZBS-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s4, ra, a0
-; RV32IMZBS-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, a1, a0
-; RV32IMZBS-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, a0, a1
-; RV32IMZBS-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor ra, a0, a1
-; RV32IMZBS-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 216(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a0, a4
-; RV32IMZBS-NEXT: xor a3, a5, a3
-; RV32IMZBS-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 432(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a0, a1
-; RV32IMZBS-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a2, s8, s10
+; RV32IMZBS-NEXT: sw a2, 156(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s8, a1, 29
+; RV32IMZBS-NEXT: addi s8, s8, -1
+; RV32IMZBS-NEXT: bexti s9, a0, 29
+; RV32IMZBS-NEXT: addi s9, s9, -1
+; RV32IMZBS-NEXT: slli s10, t0, 29
+; RV32IMZBS-NEXT: and t4, s8, s10
+; RV32IMZBS-NEXT: slli s11, a7, 29
+; RV32IMZBS-NEXT: and a5, s9, s11
+; RV32IMZBS-NEXT: and a2, s8, s11
+; RV32IMZBS-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi t1, t1, 1
+; RV32IMZBS-NEXT: andi t2, t2, 1
+; RV32IMZBS-NEXT: seqz t1, t1
+; RV32IMZBS-NEXT: seqz t2, t2
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: slli s11, t0, 30
+; RV32IMZBS-NEXT: and t0, t1, t0
+; RV32IMZBS-NEXT: and t2, t2, a7
+; RV32IMZBS-NEXT: and a2, t1, a7
+; RV32IMZBS-NEXT: sw a2, 8(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, a7, 30
+; RV32IMZBS-NEXT: bexti a1, a1, 30
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and s11, a1, s11
+; RV32IMZBS-NEXT: and a0, a0, a7
+; RV32IMZBS-NEXT: and a1, a1, a7
+; RV32IMZBS-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 320(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, s5
+; RV32IMZBS-NEXT: sw a1, 320(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 576(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 464(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 452(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 416(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 408(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 516(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 364(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 464(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 348(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 312(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 452(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a1, 344(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 340(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 336(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a1, a1, s3
+; RV32IMZBS-NEXT: sw a1, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 416(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 460(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 412(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 476(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 368(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 460(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 340(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 424(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a1, ra, s2
+; RV32IMZBS-NEXT: sw a1, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, s4, a1
+; RV32IMZBS-NEXT: lw a1, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 412(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 468(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 420(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 468(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 376(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 428(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s9, a6, s7
+; RV32IMZBS-NEXT: lw a1, 316(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, t0, a1
+; RV32IMZBS-NEXT: lw a1, 292(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 268(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, a1, a2
+; RV32IMZBS-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 224(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, a1, s0
+; RV32IMZBS-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 168(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, a1, s1
+; RV32IMZBS-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 88(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, a1, a2
+; RV32IMZBS-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 44(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, a1, a2
+; RV32IMZBS-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 16(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s5, a1, a2
+; RV32IMZBS-NEXT: xor s11, t4, s11
+; RV32IMZBS-NEXT: xor s2, t2, t3
+; RV32IMZBS-NEXT: lw a1, 308(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 284(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a1, a2
+; RV32IMZBS-NEXT: lw a1, 264(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 236(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, a1, a2
+; RV32IMZBS-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 180(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a1, a2
+; RV32IMZBS-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 104(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, a1, a2
+; RV32IMZBS-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 48(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, a1, a2
+; RV32IMZBS-NEXT: xor t6, a4, a3
+; RV32IMZBS-NEXT: xor t0, a5, a0
+; RV32IMZBS-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a1, a0
; RV32IMZBS-NEXT: lw a0, 328(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: or a0, a2, a7
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a2, t0, t3
-; RV32IMZBS-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, s6, a0
-; RV32IMZBS-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, s7, a0
-; RV32IMZBS-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, s8, a0
-; RV32IMZBS-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s10, a0
-; RV32IMZBS-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 116(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, a0
-; RV32IMZBS-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 300(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 256(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: lw a2, 208(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 188(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: lw a3, 136(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 116(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: lw a4, 64(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 56(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 28(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a6, 24(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, a6
+; RV32IMZBS-NEXT: lw a6, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 4(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a6, s7
+; RV32IMZBS-NEXT: lw s7, 320(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s7, s8
+; RV32IMZBS-NEXT: sw s7, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 444(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 396(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 464(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 452(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 416(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s7, s8
+; RV32IMZBS-NEXT: sw s7, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 448(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 400(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 460(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 576(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 424(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 712(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s7, ra
+; RV32IMZBS-NEXT: sw s7, 560(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 412(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, s8, s7
+; RV32IMZBS-NEXT: lw s7, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 544(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 488(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 456(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 468(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s8, s7
+; RV32IMZBS-NEXT: sw s7, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s7, 404(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 428(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s8, s7
+; RV32IMZBS-NEXT: lw s7, 720(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, s9, s7
+; RV32IMZBS-NEXT: sw s7, 720(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s6, s10, s6
+; RV32IMZBS-NEXT: sw s6, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s6, 296(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, s0, s6
+; RV32IMZBS-NEXT: lw s0, 228(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, s0
+; RV32IMZBS-NEXT: lw s0, 140(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, s0
+; RV32IMZBS-NEXT: lw s0, 76(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, s4, s0
+; RV32IMZBS-NEXT: lw s0, 32(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s5, s0
+; RV32IMZBS-NEXT: sw s0, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s0, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s11, s0
+; RV32IMZBS-NEXT: sw s0, 708(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s0, s2, t1
+; RV32IMZBS-NEXT: lw t1, 304(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, t2, t1
+; RV32IMZBS-NEXT: lw t1, 232(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, t3, t1
+; RV32IMZBS-NEXT: lw t1, 164(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, t1
+; RV32IMZBS-NEXT: lw t1, 84(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, t1
+; RV32IMZBS-NEXT: lw t1, 36(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, t6, t1
+; RV32IMZBS-NEXT: lw t1, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t1
+; RV32IMZBS-NEXT: sw t0, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a7, a7, a0
+; RV32IMZBS-NEXT: lw a0, 324(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, a1, a0
; RV32IMZBS-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, t6, a0
-; RV32IMZBS-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s0, a0
-; RV32IMZBS-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s1, a0
-; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s4, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, t4, s5
-; RV32IMZBS-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor s9, ra, s9
-; RV32IMZBS-NEXT: lw a0, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, a0, s2
-; RV32IMZBS-NEXT: lw a0, 208(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, a0, t5
-; RV32IMZBS-NEXT: xor ra, a4, s3
-; RV32IMZBS-NEXT: xor s5, a3, t2
-; RV32IMZBS-NEXT: lw a0, 428(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a2, a0
+; RV32IMZBS-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a3, a0
+; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a4, a0
+; RV32IMZBS-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, a1, a0
+; RV32IMZBS-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a6, a0
+; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a4, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a3, a4, 4
+; RV32IMZBS-NEXT: lw a1, 744(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a4, a4, a1
+; RV32IMZBS-NEXT: and a3, a3, a1
+; RV32IMZBS-NEXT: slli a4, a4, 4
+; RV32IMZBS-NEXT: or a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a5, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a4, a5, 4
+; RV32IMZBS-NEXT: and a5, a5, a1
+; RV32IMZBS-NEXT: and a4, a4, a1
+; RV32IMZBS-NEXT: slli a5, a5, 4
+; RV32IMZBS-NEXT: or a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 696(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a0, a2
+; RV32IMZBS-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a2, a0
+; RV32IMZBS-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a2, a0
+; RV32IMZBS-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s11, a2, a0
+; RV32IMZBS-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, a1, a0
-; RV32IMZBS-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, a1, a0
-; RV32IMZBS-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 464(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, a1, a0
+; RV32IMZBS-NEXT: xor a0, a0, ra
+; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, a2, a0
+; RV32IMZBS-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 448(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, s0, a0
-; RV32IMZBS-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s1, a0
-; RV32IMZBS-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, a2, a0
+; RV32IMZBS-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, s8, a0
+; RV32IMZBS-NEXT: lw a0, 388(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli s7, a0, 4
+; RV32IMZBS-NEXT: and s8, a0, a1
+; RV32IMZBS-NEXT: and s7, s7, a1
+; RV32IMZBS-NEXT: slli s8, s8, 4
+; RV32IMZBS-NEXT: or a0, s7, s8
+; RV32IMZBS-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli s8, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: and s8, s8, a1
+; RV32IMZBS-NEXT: slli a0, a0, 4
+; RV32IMZBS-NEXT: or a0, s8, a0
+; RV32IMZBS-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s7, a0, s6
+; RV32IMZBS-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s5, s1, a0
+; RV32IMZBS-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, s3, a0
+; RV32IMZBS-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s4, a0
+; RV32IMZBS-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor s3, a1, a0
-; RV32IMZBS-NEXT: xor s4, a2, a6
-; RV32IMZBS-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a7, a0
-; RV32IMZBS-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, t0, a0
-; RV32IMZBS-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 416(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, s0, t2
+; RV32IMZBS-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, t3, a0
+; RV32IMZBS-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, t4, a0
+; RV32IMZBS-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t5, a0
+; RV32IMZBS-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t6, a0
+; RV32IMZBS-NEXT: xor t6, a7, t0
+; RV32IMZBS-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, t1, a0
+; RV32IMZBS-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a1, a0
+; RV32IMZBS-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a7, a1, a0
-; RV32IMZBS-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t0, a1, a0
-; RV32IMZBS-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, a0
-; RV32IMZBS-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: lw a1, 268(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: lw a2, 264(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a3, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: lw a3, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: lw a4, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s9, a4, s9
-; RV32IMZBS-NEXT: lw a4, 144(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, s10, a4
-; RV32IMZBS-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, s11, a4
-; RV32IMZBS-NEXT: lw a4, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor ra, ra, a4
-; RV32IMZBS-NEXT: lw a4, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, s5, a4
-; RV32IMZBS-NEXT: lw s5, 428(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, s5, s6
-; RV32IMZBS-NEXT: sw s5, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s5, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, s6, s5
-; RV32IMZBS-NEXT: lw s5, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, s7, s5
-; RV32IMZBS-NEXT: lw s5, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, s8, s5
-; RV32IMZBS-NEXT: lw s5, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, s5, t2
-; RV32IMZBS-NEXT: lw t2, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, t2
-; RV32IMZBS-NEXT: lw t2, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t4, t2
-; RV32IMZBS-NEXT: lw t2, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t5, t2
-; RV32IMZBS-NEXT: lw t5, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, t6, t5
-; RV32IMZBS-NEXT: lw t5, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, s0, t5
-; RV32IMZBS-NEXT: lw t5, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s1, t5
-; RV32IMZBS-NEXT: lw t5, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, s2, t5
-; RV32IMZBS-NEXT: sw t5, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw t5, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, s3, t5
-; RV32IMZBS-NEXT: xor t5, s4, a5
-; RV32IMZBS-NEXT: lw a5, 300(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, a6, a5
-; RV32IMZBS-NEXT: lw a5, 296(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s4, a7, a5
-; RV32IMZBS-NEXT: lw a5, 292(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a5
-; RV32IMZBS-NEXT: xor a6, t1, a0
-; RV32IMZBS-NEXT: lw a5, 288(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a1, a5
-; RV32IMZBS-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a0
-; RV32IMZBS-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a0
+; RV32IMZBS-NEXT: xor t1, a3, a4
+; RV32IMZBS-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a5, a0
+; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a0
+; RV32IMZBS-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, a0
+; RV32IMZBS-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s11, a0, s11
; RV32IMZBS-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, s9, a0
-; RV32IMZBS-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, s10, a0
-; RV32IMZBS-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s9, s11, a0
-; RV32IMZBS-NEXT: lw a0, 156(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a0
+; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a0
+; RV32IMZBS-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a0
+; RV32IMZBS-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, a0, ra
+; RV32IMZBS-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, s8, a0
+; RV32IMZBS-NEXT: lw s8, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, s9, s8
+; RV32IMZBS-NEXT: lw s8, 484(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, s10, s8
+; RV32IMZBS-NEXT: xor s5, s7, s5
+; RV32IMZBS-NEXT: lw s7, 276(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, s6, s7
+; RV32IMZBS-NEXT: lw s7, 192(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, s7
+; RV32IMZBS-NEXT: lw s7, 96(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, s7
+; RV32IMZBS-NEXT: xor s0, s4, s0
+; RV32IMZBS-NEXT: lw s4, 280(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, s4
+; RV32IMZBS-NEXT: lw s4, 196(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, s4
+; RV32IMZBS-NEXT: lw s4, 100(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, s4
+; RV32IMZBS-NEXT: xor t2, t6, t2
+; RV32IMZBS-NEXT: lw t6, 288(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, t3, t6
+; RV32IMZBS-NEXT: lw t6, 204(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t6
+; RV32IMZBS-NEXT: lw t6, 108(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t6
+; RV32IMZBS-NEXT: xor a6, t1, a6
+; RV32IMZBS-NEXT: lw t1, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t1
+; RV32IMZBS-NEXT: lw t1, 492(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, t1
+; RV32IMZBS-NEXT: xor a3, s11, a3
+; RV32IMZBS-NEXT: lw t1, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, t1
+; RV32IMZBS-NEXT: lw t1, 496(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, t1
; RV32IMZBS-NEXT: xor a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a0
-; RV32IMZBS-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s10, a1, 4
-; RV32IMZBS-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and s11, a1, a0
-; RV32IMZBS-NEXT: and s10, s10, a0
-; RV32IMZBS-NEXT: slli s11, s11, 4
-; RV32IMZBS-NEXT: or s10, s10, s11
-; RV32IMZBS-NEXT: lw a1, 432(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s11, a1, 4
-; RV32IMZBS-NEXT: and ra, a1, a0
-; RV32IMZBS-NEXT: and s11, s11, a0
-; RV32IMZBS-NEXT: mv a1, a0
-; RV32IMZBS-NEXT: slli ra, ra, 4
-; RV32IMZBS-NEXT: or s11, s11, ra
-; RV32IMZBS-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, a0, s6
-; RV32IMZBS-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, s7, a0
-; RV32IMZBS-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, s8, a0
-; RV32IMZBS-NEXT: xor t3, s5, t3
-; RV32IMZBS-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t4, a0
-; RV32IMZBS-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t2, a0
-; RV32IMZBS-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s1, a0
-; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, s2, a0
-; RV32IMZBS-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s5, a0, 4
-; RV32IMZBS-NEXT: and ra, a0, a1
-; RV32IMZBS-NEXT: and s5, s5, a1
-; RV32IMZBS-NEXT: slli ra, ra, 4
-; RV32IMZBS-NEXT: or s5, s5, ra
-; RV32IMZBS-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli ra, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, a1
-; RV32IMZBS-NEXT: and ra, ra, a1
-; RV32IMZBS-NEXT: slli a0, a0, 4
-; RV32IMZBS-NEXT: or a0, ra, a0
-; RV32IMZBS-NEXT: xor t5, t5, s3
-; RV32IMZBS-NEXT: lw a1, 320(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, s4, a1
-; RV32IMZBS-NEXT: lw a1, 316(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a1
-; RV32IMZBS-NEXT: xor a1, a6, a5
-; RV32IMZBS-NEXT: lw a5, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a5
-; RV32IMZBS-NEXT: lw a5, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a5
-; RV32IMZBS-NEXT: lw a5, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, s9, a5
-; RV32IMZBS-NEXT: lw a6, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a6
-; RV32IMZBS-NEXT: xor s4, s6, s7
-; RV32IMZBS-NEXT: lw s6, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, s8, s6
-; RV32IMZBS-NEXT: xor t3, t3, t4
-; RV32IMZBS-NEXT: lw t4, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t2, t4
-; RV32IMZBS-NEXT: mv s7, t6
-; RV32IMZBS-NEXT: xor t4, t6, s0
+; RV32IMZBS-NEXT: lw t1, 604(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, s9, t1
+; RV32IMZBS-NEXT: lw t6, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, s10, t6
+; RV32IMZBS-NEXT: xor s4, s5, s6
+; RV32IMZBS-NEXT: lw s5, 212(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, s5
+; RV32IMZBS-NEXT: lw s5, 120(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, s5
+; RV32IMZBS-NEXT: xor s0, s0, s1
+; RV32IMZBS-NEXT: lw s1, 216(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t4, t4, s1
-; RV32IMZBS-NEXT: lw t6, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, s2, t6
-; RV32IMZBS-NEXT: xor t5, t5, s3
-; RV32IMZBS-NEXT: lw a6, 324(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a6
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 312(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: xor a3, a7, t1
-; RV32IMZBS-NEXT: xor a3, a3, a5
-; RV32IMZBS-NEXT: lw a5, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 124(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, s1
+; RV32IMZBS-NEXT: xor t2, t2, t3
+; RV32IMZBS-NEXT: lw t3, 220(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t3
+; RV32IMZBS-NEXT: lw t3, 132(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t3
+; RV32IMZBS-NEXT: lw s1, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t3, s1, 2
+; RV32IMZBS-NEXT: lw s6, 728(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and s1, s1, s6
+; RV32IMZBS-NEXT: and t3, t3, s6
+; RV32IMZBS-NEXT: slli s1, s1, 2
+; RV32IMZBS-NEXT: or t3, t3, s1
+; RV32IMZBS-NEXT: lw s5, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli s1, s5, 2
+; RV32IMZBS-NEXT: and s5, s5, s6
+; RV32IMZBS-NEXT: and s1, s1, s6
+; RV32IMZBS-NEXT: slli s5, s5, 2
+; RV32IMZBS-NEXT: or s1, s1, s5
+; RV32IMZBS-NEXT: xor a4, a6, a4
+; RV32IMZBS-NEXT: lw a6, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, a6
+; RV32IMZBS-NEXT: xor a1, a3, a1
+; RV32IMZBS-NEXT: lw a3, 528(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: xor a0, a0, t1
+; RV32IMZBS-NEXT: lw a3, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, t6, a3
+; RV32IMZBS-NEXT: lw t1, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a6, t1, 2
+; RV32IMZBS-NEXT: and t1, t1, s6
+; RV32IMZBS-NEXT: and a6, a6, s6
+; RV32IMZBS-NEXT: slli t1, t1, 2
+; RV32IMZBS-NEXT: or a6, a6, t1
+; RV32IMZBS-NEXT: lw t6, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t1, t6, 2
+; RV32IMZBS-NEXT: and t6, t6, s6
+; RV32IMZBS-NEXT: and t1, t1, s6
+; RV32IMZBS-NEXT: mv s5, s6
+; RV32IMZBS-NEXT: slli t6, t6, 2
+; RV32IMZBS-NEXT: or t1, t1, t6
+; RV32IMZBS-NEXT: xor t6, s4, s2
+; RV32IMZBS-NEXT: lw s2, 152(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s3, s2
+; RV32IMZBS-NEXT: xor t4, s0, t4
+; RV32IMZBS-NEXT: lw s0, 148(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, s0
+; RV32IMZBS-NEXT: xor a7, t2, a7
+; RV32IMZBS-NEXT: lw t2, 156(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t2
; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: xor a5, s4, s6
-; RV32IMZBS-NEXT: xor t1, t3, t2
-; RV32IMZBS-NEXT: lw a6, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t4, a6
-; RV32IMZBS-NEXT: lw t3, 608(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t6, t3
-; RV32IMZBS-NEXT: xor t0, t5, t0
; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: lw a3, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: srli a4, s10, 2
-; RV32IMZBS-NEXT: lw t6, 688(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a6, s10, t6
-; RV32IMZBS-NEXT: and a4, a4, t6
-; RV32IMZBS-NEXT: slli a6, a6, 2
-; RV32IMZBS-NEXT: or a4, a4, a6
-; RV32IMZBS-NEXT: srli a6, s11, 2
-; RV32IMZBS-NEXT: and t4, s11, t6
-; RV32IMZBS-NEXT: and a6, a6, t6
-; RV32IMZBS-NEXT: slli t4, t4, 2
-; RV32IMZBS-NEXT: or a6, a6, t4
-; RV32IMZBS-NEXT: lw t4, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, t4
-; RV32IMZBS-NEXT: lw t4, 620(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, t4
-; RV32IMZBS-NEXT: srli t4, s5, 2
-; RV32IMZBS-NEXT: and t5, s5, t6
-; RV32IMZBS-NEXT: and t4, t4, t6
-; RV32IMZBS-NEXT: slli t5, t5, 2
-; RV32IMZBS-NEXT: or t4, t4, t5
-; RV32IMZBS-NEXT: srli t5, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, t6
-; RV32IMZBS-NEXT: and t5, t5, t6
-; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: or a0, t5, a0
-; RV32IMZBS-NEXT: lw t5, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, t5
-; RV32IMZBS-NEXT: lw t5, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, t5
-; RV32IMZBS-NEXT: xor a5, t1, a5
-; RV32IMZBS-NEXT: sw a5, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a1, a1, t0
-; RV32IMZBS-NEXT: sw a1, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli a1, s7, 24
-; RV32IMZBS-NEXT: lw t1, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a5, t2, t1
-; RV32IMZBS-NEXT: slli a5, a5, 8
-; RV32IMZBS-NEXT: or a5, a1, a5
-; RV32IMZBS-NEXT: xor a1, t2, t3
-; RV32IMZBS-NEXT: srli t0, t2, 8
-; RV32IMZBS-NEXT: and t0, t0, t1
-; RV32IMZBS-NEXT: srli a1, a1, 24
-; RV32IMZBS-NEXT: or s0, t0, a1
-; RV32IMZBS-NEXT: slli a7, a7, 24
-; RV32IMZBS-NEXT: and a1, a2, t1
-; RV32IMZBS-NEXT: slli a1, a1, 8
-; RV32IMZBS-NEXT: or a7, a7, a1
-; RV32IMZBS-NEXT: xor a3, a2, a3
-; RV32IMZBS-NEXT: srli a2, a2, 8
-; RV32IMZBS-NEXT: and a1, a2, t1
-; RV32IMZBS-NEXT: srli a3, a3, 24
-; RV32IMZBS-NEXT: or a3, a1, a3
-; RV32IMZBS-NEXT: srli a1, a4, 1
-; RV32IMZBS-NEXT: lw t0, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a2, a4, t0
-; RV32IMZBS-NEXT: and a1, a1, t0
-; RV32IMZBS-NEXT: slli a2, a2, 1
-; RV32IMZBS-NEXT: or a1, a1, a2
-; RV32IMZBS-NEXT: srli a2, a6, 1
-; RV32IMZBS-NEXT: and a4, a6, t0
-; RV32IMZBS-NEXT: and a2, a2, t0
-; RV32IMZBS-NEXT: slli a4, a4, 1
-; RV32IMZBS-NEXT: or a2, a2, a4
-; RV32IMZBS-NEXT: or a5, a5, s0
-; RV32IMZBS-NEXT: sw a5, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli a4, t4, 1
-; RV32IMZBS-NEXT: and a5, t4, t0
-; RV32IMZBS-NEXT: and a4, a4, t0
+; RV32IMZBS-NEXT: srli a2, t3, 1
+; RV32IMZBS-NEXT: lw s11, 724(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a5, t3, s11
+; RV32IMZBS-NEXT: xor a0, a0, a3
+; RV32IMZBS-NEXT: srli a3, s1, 1
+; RV32IMZBS-NEXT: and t2, s1, s11
+; RV32IMZBS-NEXT: xor t3, t6, s2
+; RV32IMZBS-NEXT: srli t6, a6, 1
+; RV32IMZBS-NEXT: and a6, a6, s11
+; RV32IMZBS-NEXT: xor t4, t4, t5
+; RV32IMZBS-NEXT: srli t5, t1, 1
+; RV32IMZBS-NEXT: and t1, t1, s11
+; RV32IMZBS-NEXT: xor a7, a7, t0
+; RV32IMZBS-NEXT: and a2, a2, s11
; RV32IMZBS-NEXT: slli a5, a5, 1
-; RV32IMZBS-NEXT: or t5, a4, a5
-; RV32IMZBS-NEXT: srli a4, a0, 1
-; RV32IMZBS-NEXT: and a0, a0, t0
-; RV32IMZBS-NEXT: and a4, a4, t0
-; RV32IMZBS-NEXT: slli a0, a0, 1
-; RV32IMZBS-NEXT: or ra, a4, a0
-; RV32IMZBS-NEXT: or a0, a7, a3
-; RV32IMZBS-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a2, 2
-; RV32IMZBS-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a2, 1
-; RV32IMZBS-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a4, a2, 4
-; RV32IMZBS-NEXT: andi a5, a2, 8
-; RV32IMZBS-NEXT: andi a6, a2, 16
-; RV32IMZBS-NEXT: andi a7, a2, 32
-; RV32IMZBS-NEXT: andi t0, a2, 64
-; RV32IMZBS-NEXT: andi t1, a2, 128
-; RV32IMZBS-NEXT: andi t2, a2, 256
-; RV32IMZBS-NEXT: andi t3, a2, 512
-; RV32IMZBS-NEXT: andi t4, a2, 1024
-; RV32IMZBS-NEXT: lw a3, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and t6, a2, a3
-; RV32IMZBS-NEXT: lui a0, 1
-; RV32IMZBS-NEXT: and s2, a2, a0
-; RV32IMZBS-NEXT: lui a0, 2
-; RV32IMZBS-NEXT: and s3, a2, a0
-; RV32IMZBS-NEXT: lui a0, 4
-; RV32IMZBS-NEXT: and s9, a2, a0
-; RV32IMZBS-NEXT: lui a0, 8
-; RV32IMZBS-NEXT: and s10, a2, a0
-; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: and s11, a2, a0
-; RV32IMZBS-NEXT: lui s0, 32
-; RV32IMZBS-NEXT: and s0, a2, s0
-; RV32IMZBS-NEXT: lui s1, 64
-; RV32IMZBS-NEXT: and s1, a2, s1
-; RV32IMZBS-NEXT: lui a0, 128
-; RV32IMZBS-NEXT: and s4, a2, a0
-; RV32IMZBS-NEXT: lui s5, 256
-; RV32IMZBS-NEXT: and s5, a2, s5
-; RV32IMZBS-NEXT: lui s6, 512
-; RV32IMZBS-NEXT: and s6, a2, s6
-; RV32IMZBS-NEXT: lui a0, 1024
-; RV32IMZBS-NEXT: and s7, a2, a0
-; RV32IMZBS-NEXT: lui s8, 2048
-; RV32IMZBS-NEXT: and s8, a2, s8
-; RV32IMZBS-NEXT: lui a0, 4096
-; RV32IMZBS-NEXT: and a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 8192
-; RV32IMZBS-NEXT: and a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 16384
-; RV32IMZBS-NEXT: and a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 32768
-; RV32IMZBS-NEXT: and a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 262144
-; RV32IMZBS-NEXT: and a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and a2, a2, a0
-; RV32IMZBS-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
+; RV32IMZBS-NEXT: and a3, a3, s11
+; RV32IMZBS-NEXT: slli t2, t2, 1
+; RV32IMZBS-NEXT: lw t0, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t0
+; RV32IMZBS-NEXT: lw t0, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, a1, t0
+; RV32IMZBS-NEXT: lw s0, 720(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, a0, s0
+; RV32IMZBS-NEXT: and t6, t6, s11
+; RV32IMZBS-NEXT: slli a6, a6, 1
+; RV32IMZBS-NEXT: and t5, t5, s11
+; RV32IMZBS-NEXT: slli t1, t1, 1
+; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, t3, a0
+; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, t4, a0
+; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, a0
+; RV32IMZBS-NEXT: or a0, a2, a5
+; RV32IMZBS-NEXT: or a1, a3, t2
+; RV32IMZBS-NEXT: srli a2, t2, 31
+; RV32IMZBS-NEXT: xor a3, t0, a4
+; RV32IMZBS-NEXT: sw a3, 720(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: srli a3, s0, 8
+; RV32IMZBS-NEXT: srli a4, s0, 24
+; RV32IMZBS-NEXT: slli a5, s0, 24
+; RV32IMZBS-NEXT: lw s4, 740(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and t2, s0, s4
+; RV32IMZBS-NEXT: or t4, t6, a6
+; RV32IMZBS-NEXT: or t6, t5, t1
+; RV32IMZBS-NEXT: srli a6, t1, 31
+; RV32IMZBS-NEXT: xor t0, t3, s1
+; RV32IMZBS-NEXT: sw t0, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: srli t1, a7, 8
+; RV32IMZBS-NEXT: srli t3, a7, 24
+; RV32IMZBS-NEXT: slli t5, a7, 24
+; RV32IMZBS-NEXT: and a7, a7, s4
+; RV32IMZBS-NEXT: slli t0, a0, 1
+; RV32IMZBS-NEXT: andi s0, a1, 2
+; RV32IMZBS-NEXT: slli s1, a0, 2
+; RV32IMZBS-NEXT: andi s2, a1, 4
+; RV32IMZBS-NEXT: slli s3, a0, 3
+; RV32IMZBS-NEXT: and a3, a3, s4
+; RV32IMZBS-NEXT: or a3, a3, a4
+; RV32IMZBS-NEXT: andi a4, a1, 8
+; RV32IMZBS-NEXT: slli t2, t2, 8
+; RV32IMZBS-NEXT: or a5, a5, t2
+; RV32IMZBS-NEXT: slli t2, a0, 4
+; RV32IMZBS-NEXT: and t1, t1, s4
+; RV32IMZBS-NEXT: or t1, t1, t3
+; RV32IMZBS-NEXT: andi t3, a1, 16
+; RV32IMZBS-NEXT: slli a7, a7, 8
+; RV32IMZBS-NEXT: or a7, t5, a7
+; RV32IMZBS-NEXT: slli t5, a0, 31
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a2, a2, t5
+; RV32IMZBS-NEXT: sw a2, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a0, 5
+; RV32IMZBS-NEXT: or a3, a5, a3
+; RV32IMZBS-NEXT: sw a3, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, t4, 31
+; RV32IMZBS-NEXT: seqz a5, a6
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a3, a5, a3
+; RV32IMZBS-NEXT: sw a3, 712(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a3, a1, 32
+; RV32IMZBS-NEXT: or a5, a7, t1
+; RV32IMZBS-NEXT: sw a5, 708(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, a0, 6
+; RV32IMZBS-NEXT: seqz a6, s0
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and a6, a6, t0
+; RV32IMZBS-NEXT: sw a6, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a6, a1, 64
+; RV32IMZBS-NEXT: seqz a7, s2
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: and a7, a7, s1
+; RV32IMZBS-NEXT: sw a7, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, a0, 7
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a4, a4, s3
+; RV32IMZBS-NEXT: sw a4, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a1, 128
+; RV32IMZBS-NEXT: seqz t1, t3
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: and t0, t1, t2
+; RV32IMZBS-NEXT: sw t0, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t1, a0, 8
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a2, a3, a2
+; RV32IMZBS-NEXT: sw a2, 664(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a2, a1, 256
+; RV32IMZBS-NEXT: seqz a3, a6
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a3, a3, a5
+; RV32IMZBS-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, a0, 9
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a5, a4, a7
+; RV32IMZBS-NEXT: andi a4, a1, 512
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and t1, a2, t1
+; RV32IMZBS-NEXT: slli a2, a0, 10
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a3, a4, a3
+; RV32IMZBS-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a3, a1, 1024
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a2, a3, a2
+; RV32IMZBS-NEXT: sw a2, 696(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a2, a1
+; RV32IMZBS-NEXT: bexti a3, a2, 11
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 11
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 12
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 13
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 14
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 15
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 16
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 17
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 18
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 19
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 20
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 21
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 22
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 23
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 23
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 24
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 24
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 25
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 25
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 26
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 26
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 27
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 27
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 28
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 28
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 29
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 29
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 592(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a1, a1, 1
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a1, a1, a0
+; RV32IMZBS-NEXT: sw a1, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a0, a0, 30
+; RV32IMZBS-NEXT: bexti a1, a2, 30
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t6, 2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t4, 1
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
+; RV32IMZBS-NEXT: andi a0, t6, 4
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t4, 2
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a4
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a5
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a6
+; RV32IMZBS-NEXT: andi a0, t6, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t4, 3
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a7
+; RV32IMZBS-NEXT: andi a0, t6, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t4, 4
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t0
-; RV32IMZBS-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t1
-; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t2
+; RV32IMZBS-NEXT: andi a0, t6, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t4, 5
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t3
-; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t4
-; RV32IMZBS-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t6
-; RV32IMZBS-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s2
-; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s3
-; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s9
-; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s10
-; RV32IMZBS-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s11
-; RV32IMZBS-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s0
-; RV32IMZBS-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s1
-; RV32IMZBS-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s4
+; RV32IMZBS-NEXT: andi a0, t6, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t4, 6
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t6, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t4, 7
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s5
+; RV32IMZBS-NEXT: andi a0, t6, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli s1, t4, 8
+; RV32IMZBS-NEXT: and a0, a0, s1
; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s6
-; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s7
-; RV32IMZBS-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s8
-; RV32IMZBS-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, a2
-; RV32IMZBS-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t1, ra, a3
-; RV32IMZBS-NEXT: lui a0, 1
-; RV32IMZBS-NEXT: and t4, ra, a0
-; RV32IMZBS-NEXT: lui a0, 2
-; RV32IMZBS-NEXT: and a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 4
-; RV32IMZBS-NEXT: and a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 8
-; RV32IMZBS-NEXT: and a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: and a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 32
-; RV32IMZBS-NEXT: and a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 64
-; RV32IMZBS-NEXT: and t0, ra, a0
-; RV32IMZBS-NEXT: lui a0, 128
-; RV32IMZBS-NEXT: and s4, ra, a0
-; RV32IMZBS-NEXT: lui a0, 256
-; RV32IMZBS-NEXT: and s5, ra, a0
-; RV32IMZBS-NEXT: lui a0, 512
-; RV32IMZBS-NEXT: and s6, ra, a0
-; RV32IMZBS-NEXT: lui a0, 1024
-; RV32IMZBS-NEXT: and s7, ra, a0
-; RV32IMZBS-NEXT: lui a0, 2048
-; RV32IMZBS-NEXT: and s8, ra, a0
-; RV32IMZBS-NEXT: lui a0, 4096
-; RV32IMZBS-NEXT: and s9, ra, a0
-; RV32IMZBS-NEXT: lui a0, 8192
-; RV32IMZBS-NEXT: and s10, ra, a0
-; RV32IMZBS-NEXT: lui a0, 16384
-; RV32IMZBS-NEXT: and s11, ra, a0
-; RV32IMZBS-NEXT: lui a0, 32768
-; RV32IMZBS-NEXT: and a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and s3, ra, a0
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 262144
-; RV32IMZBS-NEXT: and a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t3, ra, 2
-; RV32IMZBS-NEXT: andi t6, ra, 1
-; RV32IMZBS-NEXT: andi a0, ra, 4
-; RV32IMZBS-NEXT: andi a1, ra, 8
-; RV32IMZBS-NEXT: andi a2, ra, 16
-; RV32IMZBS-NEXT: andi a3, ra, 32
-; RV32IMZBS-NEXT: andi a4, ra, 64
-; RV32IMZBS-NEXT: andi a5, ra, 128
-; RV32IMZBS-NEXT: andi a6, ra, 256
-; RV32IMZBS-NEXT: andi a7, ra, 512
-; RV32IMZBS-NEXT: andi ra, ra, 1024
-; RV32IMZBS-NEXT: mul t2, t5, t3
-; RV32IMZBS-NEXT: sw t2, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t2, t5, t6
-; RV32IMZBS-NEXT: sw t2, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, a0
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s0, t5, a1
-; RV32IMZBS-NEXT: mul s2, t5, a2
-; RV32IMZBS-NEXT: mul t6, t5, a3
-; RV32IMZBS-NEXT: mul a0, t5, a4
-; RV32IMZBS-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, a5
-; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s1, t5, a6
-; RV32IMZBS-NEXT: mul t3, t5, a7
-; RV32IMZBS-NEXT: mul a0, t5, ra
-; RV32IMZBS-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, t1
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, t4
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t4, t5, a0
-; RV32IMZBS-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t1, t5, a0
-; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul ra, t5, a0
-; RV32IMZBS-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t5, a0
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t5, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, t0
-; RV32IMZBS-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s4, t5, s4
-; RV32IMZBS-NEXT: mul t2, t5, s5
-; RV32IMZBS-NEXT: mul s6, t5, s6
-; RV32IMZBS-NEXT: mul a0, t5, s7
-; RV32IMZBS-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s8, t5, s8
-; RV32IMZBS-NEXT: mul s9, t5, s9
-; RV32IMZBS-NEXT: mul s10, t5, s10
-; RV32IMZBS-NEXT: mul s11, t5, s11
-; RV32IMZBS-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s7, t5, a0
-; RV32IMZBS-NEXT: mul s3, t5, s3
-; RV32IMZBS-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t5, a0
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t5, a0
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t5, a0
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, a1, a0
-; RV32IMZBS-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a0, a1
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a0, a2
-; RV32IMZBS-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a3, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a0, a3
-; RV32IMZBS-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a0, a4
-; RV32IMZBS-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a5, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a0, a5
+; RV32IMZBS-NEXT: andi a0, t6, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t4, 9
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t6, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t4, 10
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a0, t6
+; RV32IMZBS-NEXT: bexti a1, a0, 11
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t4, 11
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 12
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t4, 12
+; RV32IMZBS-NEXT: and s9, a1, a2
+; RV32IMZBS-NEXT: bexti a1, a0, 13
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli s0, t4, 13
+; RV32IMZBS-NEXT: and a1, a1, s0
+; RV32IMZBS-NEXT: sw a1, 528(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 14
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t4, 14
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 544(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 15
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t4, 15
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 16
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, t4, 16
+; RV32IMZBS-NEXT: and s4, a1, a3
+; RV32IMZBS-NEXT: bexti a1, a0, 17
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t4, 17
+; RV32IMZBS-NEXT: and s3, a1, a2
+; RV32IMZBS-NEXT: bexti a1, a0, 18
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, t4, 18
+; RV32IMZBS-NEXT: and s8, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 19
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, t4, 19
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 520(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 20
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, t4, 20
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 21
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, t4, 21
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 22
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, t4, 22
+; RV32IMZBS-NEXT: and s1, a1, a4
+; RV32IMZBS-NEXT: bexti a2, a0, 23
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a4, t4, 23
+; RV32IMZBS-NEXT: and s0, a2, a4
+; RV32IMZBS-NEXT: bexti a3, a0, 24
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t4, 24
+; RV32IMZBS-NEXT: and s2, a3, a4
+; RV32IMZBS-NEXT: bexti a4, a0, 25
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a7, t4, 25
+; RV32IMZBS-NEXT: and s6, a4, a7
+; RV32IMZBS-NEXT: bexti a7, a0, 26
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: slli t2, t4, 26
+; RV32IMZBS-NEXT: and s7, a7, t2
+; RV32IMZBS-NEXT: bexti t2, a0, 27
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: slli t5, t4, 27
+; RV32IMZBS-NEXT: and s10, t2, t5
+; RV32IMZBS-NEXT: bexti t5, a0, 28
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: slli t0, t4, 28
+; RV32IMZBS-NEXT: and t5, t5, t0
+; RV32IMZBS-NEXT: bexti t0, a0, 29
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: slli t3, t4, 29
+; RV32IMZBS-NEXT: and t2, t0, t3
+; RV32IMZBS-NEXT: andi t3, t6, 1
+; RV32IMZBS-NEXT: seqz t3, t3
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: and t3, t3, t4
+; RV32IMZBS-NEXT: slli t4, t4, 30
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and a6, a0, t4
+; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, a1, a0
+; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, a0, a1
+; RV32IMZBS-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli a0, a1, 4
-; RV32IMZBS-NEXT: lw s5, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a1, a1, s5
-; RV32IMZBS-NEXT: and a0, a0, s5
-; RV32IMZBS-NEXT: slli a1, a1, 4
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw t5, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, t5, a1
-; RV32IMZBS-NEXT: lw t5, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, t5, s0
-; RV32IMZBS-NEXT: xor t6, s2, t6
-; RV32IMZBS-NEXT: xor t3, s1, t3
-; RV32IMZBS-NEXT: xor t1, t4, t1
-; RV32IMZBS-NEXT: xor t2, s4, t2
-; RV32IMZBS-NEXT: xor t4, s9, s10
-; RV32IMZBS-NEXT: lw s2, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s1, s2, 4
-; RV32IMZBS-NEXT: and s4, s2, s5
-; RV32IMZBS-NEXT: and s1, s1, s5
-; RV32IMZBS-NEXT: slli s4, s4, 4
-; RV32IMZBS-NEXT: or s1, s1, s4
-; RV32IMZBS-NEXT: xor a6, t0, a6
-; RV32IMZBS-NEXT: lw t0, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: xor a1, a5, t1
+; RV32IMZBS-NEXT: lw a2, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: lw a3, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: lw a4, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 592(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a7, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, a7
+; RV32IMZBS-NEXT: lw a7, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, t3, a7
+; RV32IMZBS-NEXT: lw a7, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t0, 564(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a7, a7, t0
-; RV32IMZBS-NEXT: lw t0, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t0
-; RV32IMZBS-NEXT: lw t0, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, t0
-; RV32IMZBS-NEXT: lw t0, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, t0
-; RV32IMZBS-NEXT: lw t0, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, t0
-; RV32IMZBS-NEXT: xor a1, a1, s0
-; RV32IMZBS-NEXT: lw t0, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t6, t0
-; RV32IMZBS-NEXT: lw t6, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, t6
+; RV32IMZBS-NEXT: lw t0, 560(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t1, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t1
+; RV32IMZBS-NEXT: lw t1, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw ra, 532(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t1, t1, ra
-; RV32IMZBS-NEXT: xor t2, t2, s6
-; RV32IMZBS-NEXT: xor t4, t4, s11
-; RV32IMZBS-NEXT: xor a6, a6, a7
-; RV32IMZBS-NEXT: lw a7, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a7
-; RV32IMZBS-NEXT: lw a7, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a7
-; RV32IMZBS-NEXT: lw a7, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a7
-; RV32IMZBS-NEXT: lw a7, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a7
-; RV32IMZBS-NEXT: xor a1, a1, t0
-; RV32IMZBS-NEXT: lw a7, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw ra, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, ra, s9
+; RV32IMZBS-NEXT: xor s3, s4, s3
+; RV32IMZBS-NEXT: xor s0, s1, s0
+; RV32IMZBS-NEXT: xor t2, t2, a6
+; RV32IMZBS-NEXT: xor t4, t4, t6
+; RV32IMZBS-NEXT: lw a6, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a6
+; RV32IMZBS-NEXT: lw a6, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a6
+; RV32IMZBS-NEXT: lw a6, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, a2, a6
+; RV32IMZBS-NEXT: lw a2, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a2
+; RV32IMZBS-NEXT: lw a2, 604(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a2
+; RV32IMZBS-NEXT: lw a2, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a5, a2
+; RV32IMZBS-NEXT: lw a5, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a2, a5, 4
+; RV32IMZBS-NEXT: lw s4, 744(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a5, a5, s4
+; RV32IMZBS-NEXT: and a2, a2, s4
+; RV32IMZBS-NEXT: slli a5, a5, 4
+; RV32IMZBS-NEXT: or a5, a2, a5
; RV32IMZBS-NEXT: xor a7, t3, a7
-; RV32IMZBS-NEXT: lw t0, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, a2
+; RV32IMZBS-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t1, a2
+; RV32IMZBS-NEXT: lw a2, 528(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, s9, a2
+; RV32IMZBS-NEXT: xor s1, s3, s8
+; RV32IMZBS-NEXT: xor s0, s0, s2
+; RV32IMZBS-NEXT: lw a2, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, t2, a2
+; RV32IMZBS-NEXT: lw s2, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t2, s2, 4
+; RV32IMZBS-NEXT: and s2, s2, s4
+; RV32IMZBS-NEXT: and t2, t2, s4
+; RV32IMZBS-NEXT: slli s2, s2, 4
+; RV32IMZBS-NEXT: or t2, t2, s2
+; RV32IMZBS-NEXT: xor a0, t4, a0
+; RV32IMZBS-NEXT: lw t4, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, t4
+; RV32IMZBS-NEXT: lw t4, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t6, t4
+; RV32IMZBS-NEXT: lw t6, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t6
+; RV32IMZBS-NEXT: lw t6, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t6
+; RV32IMZBS-NEXT: xor a7, a7, t0
+; RV32IMZBS-NEXT: lw t0, 568(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t0, t1, t0
-; RV32IMZBS-NEXT: lw t1, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t2, t1
-; RV32IMZBS-NEXT: xor t2, t4, s7
-; RV32IMZBS-NEXT: lw t3, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, t3
-; RV32IMZBS-NEXT: lw t3, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t3
-; RV32IMZBS-NEXT: lw t3, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, t3
-; RV32IMZBS-NEXT: lw t3, 620(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, t3
-; RV32IMZBS-NEXT: lw t3, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, t3
-; RV32IMZBS-NEXT: srli t3, a0, 2
-; RV32IMZBS-NEXT: lw t6, 688(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, a0, t6
-; RV32IMZBS-NEXT: and t3, t3, t6
-; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: or a0, t3, a0
-; RV32IMZBS-NEXT: lw t3, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, t3
+; RV32IMZBS-NEXT: lw t1, 544(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t3, t1
; RV32IMZBS-NEXT: lw t3, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, t3
-; RV32IMZBS-NEXT: lw t3, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, t3
-; RV32IMZBS-NEXT: xor t1, t1, s8
-; RV32IMZBS-NEXT: xor t2, t2, s3
-; RV32IMZBS-NEXT: srli t3, s1, 2
-; RV32IMZBS-NEXT: and t4, s1, t6
-; RV32IMZBS-NEXT: and t3, t3, t6
-; RV32IMZBS-NEXT: slli t4, t4, 2
-; RV32IMZBS-NEXT: or t3, t3, t4
-; RV32IMZBS-NEXT: lw t4, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, s1, t3
+; RV32IMZBS-NEXT: xor t6, s0, s6
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, t4, a1
+; RV32IMZBS-NEXT: lw t4, 652(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a3, a3, t4
-; RV32IMZBS-NEXT: lw t4, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, t4
-; RV32IMZBS-NEXT: lw t4, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, t4
-; RV32IMZBS-NEXT: lw t4, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t2, t4
-; RV32IMZBS-NEXT: xor a2, a6, a2
-; RV32IMZBS-NEXT: xor a2, a2, a3
-; RV32IMZBS-NEXT: lw a3, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a5, a3
-; RV32IMZBS-NEXT: xor a5, a1, a7
-; RV32IMZBS-NEXT: xor a5, a5, t0
-; RV32IMZBS-NEXT: lw a7, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, t2, a7
-; RV32IMZBS-NEXT: xor t0, a2, a4
-; RV32IMZBS-NEXT: slli a6, a6, 24
-; RV32IMZBS-NEXT: lw a2, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, a3, a2
-; RV32IMZBS-NEXT: srli a3, a0, 1
-; RV32IMZBS-NEXT: xor t1, a5, t1
-; RV32IMZBS-NEXT: srli a2, t3, 1
-; RV32IMZBS-NEXT: lw t5, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, a0, t5
-; RV32IMZBS-NEXT: and a5, t3, t5
-; RV32IMZBS-NEXT: lw a4, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, a4
-; RV32IMZBS-NEXT: and a4, a3, t5
-; RV32IMZBS-NEXT: slli a0, a0, 1
-; RV32IMZBS-NEXT: or a4, a4, a0
-; RV32IMZBS-NEXT: and a0, a2, t5
-; RV32IMZBS-NEXT: slli a5, a5, 1
-; RV32IMZBS-NEXT: or a5, a0, a5
-; RV32IMZBS-NEXT: lw t3, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, t0, t3
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a0, a6, a0
-; RV32IMZBS-NEXT: slli a1, a1, 24
-; RV32IMZBS-NEXT: xor a6, t0, t2
-; RV32IMZBS-NEXT: srli t0, t0, 8
-; RV32IMZBS-NEXT: and t0, t0, t3
-; RV32IMZBS-NEXT: srli a6, a6, 24
-; RV32IMZBS-NEXT: or a6, t0, a6
-; RV32IMZBS-NEXT: and t0, t1, t3
-; RV32IMZBS-NEXT: slli t0, t0, 8
-; RV32IMZBS-NEXT: or a1, a1, t0
+; RV32IMZBS-NEXT: lw t4, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t4
+; RV32IMZBS-NEXT: xor a7, a7, t0
+; RV32IMZBS-NEXT: lw t0, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t1, t0
+; RV32IMZBS-NEXT: lw t1, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t3, t1
+; RV32IMZBS-NEXT: xor t3, t6, s7
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a3, a1
+; RV32IMZBS-NEXT: lw a3, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a4, a3
+; RV32IMZBS-NEXT: srli a4, a5, 2
+; RV32IMZBS-NEXT: and a5, a5, s5
+; RV32IMZBS-NEXT: and a4, a4, s5
+; RV32IMZBS-NEXT: slli a5, a5, 2
+; RV32IMZBS-NEXT: or a4, a4, a5
+; RV32IMZBS-NEXT: xor a5, a7, t0
+; RV32IMZBS-NEXT: lw a7, 536(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a7, t1, a7
-; RV32IMZBS-NEXT: srli t0, t1, 8
-; RV32IMZBS-NEXT: and t0, t0, t3
-; RV32IMZBS-NEXT: srli a7, a7, 24
-; RV32IMZBS-NEXT: or a7, t0, a7
-; RV32IMZBS-NEXT: or a6, a0, a6
-; RV32IMZBS-NEXT: lui a0, 349525
-; RV32IMZBS-NEXT: addi a0, a0, 1364
-; RV32IMZBS-NEXT: or a1, a1, a7
+; RV32IMZBS-NEXT: xor t0, t3, s10
+; RV32IMZBS-NEXT: srli t1, t2, 2
+; RV32IMZBS-NEXT: and t2, t2, s5
+; RV32IMZBS-NEXT: and t1, t1, s5
+; RV32IMZBS-NEXT: slli t2, t2, 2
+; RV32IMZBS-NEXT: or t1, t1, t2
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a1
+; RV32IMZBS-NEXT: xor a5, a5, a7
+; RV32IMZBS-NEXT: xor a7, t0, t5
+; RV32IMZBS-NEXT: srli a1, a4, 1
+; RV32IMZBS-NEXT: xor a3, a0, a3
+; RV32IMZBS-NEXT: srli a0, t1, 1
+; RV32IMZBS-NEXT: and a4, a4, s11
+; RV32IMZBS-NEXT: and t0, t1, s11
+; RV32IMZBS-NEXT: xor a5, a5, a7
+; RV32IMZBS-NEXT: and a7, a1, s11
+; RV32IMZBS-NEXT: slli a4, a4, 1
+; RV32IMZBS-NEXT: xor a6, a3, a6
+; RV32IMZBS-NEXT: and a3, a0, s11
+; RV32IMZBS-NEXT: slli t0, t0, 1
+; RV32IMZBS-NEXT: xor a5, a5, a2
+; RV32IMZBS-NEXT: or a2, a7, a4
+; RV32IMZBS-NEXT: or a3, a3, t0
+; RV32IMZBS-NEXT: srli a4, a6, 8
+; RV32IMZBS-NEXT: srli a7, a6, 24
+; RV32IMZBS-NEXT: slli t0, a6, 24
+; RV32IMZBS-NEXT: lw t2, 740(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a6, a6, t2
+; RV32IMZBS-NEXT: and a4, a4, t2
+; RV32IMZBS-NEXT: or a4, a4, a7
+; RV32IMZBS-NEXT: srli a7, a5, 8
+; RV32IMZBS-NEXT: slli a6, a6, 8
+; RV32IMZBS-NEXT: or a6, t0, a6
+; RV32IMZBS-NEXT: srli t0, a5, 24
+; RV32IMZBS-NEXT: and a7, a7, t2
+; RV32IMZBS-NEXT: or a7, a7, t0
+; RV32IMZBS-NEXT: slli t0, a5, 24
+; RV32IMZBS-NEXT: and a5, a5, t2
+; RV32IMZBS-NEXT: slli a5, a5, 8
+; RV32IMZBS-NEXT: or a5, t0, a5
+; RV32IMZBS-NEXT: or a6, a6, a4
+; RV32IMZBS-NEXT: lui a4, 349525
+; RV32IMZBS-NEXT: addi a4, a4, 1364
+; RV32IMZBS-NEXT: or a5, a5, a7
; RV32IMZBS-NEXT: srli a7, a6, 4
-; RV32IMZBS-NEXT: and a6, a6, s5
-; RV32IMZBS-NEXT: and a7, a7, s5
+; RV32IMZBS-NEXT: and a6, a6, s4
+; RV32IMZBS-NEXT: and a7, a7, s4
; RV32IMZBS-NEXT: slli a6, a6, 4
; RV32IMZBS-NEXT: or a6, a7, a6
-; RV32IMZBS-NEXT: srli a7, a1, 4
-; RV32IMZBS-NEXT: and a1, a1, s5
-; RV32IMZBS-NEXT: and a7, a7, s5
-; RV32IMZBS-NEXT: slli a1, a1, 4
-; RV32IMZBS-NEXT: or a1, a7, a1
+; RV32IMZBS-NEXT: srli a7, a5, 4
+; RV32IMZBS-NEXT: and a5, a5, s4
+; RV32IMZBS-NEXT: and a7, a7, s4
+; RV32IMZBS-NEXT: slli a5, a5, 4
+; RV32IMZBS-NEXT: or a5, a7, a5
; RV32IMZBS-NEXT: srli a7, a6, 2
-; RV32IMZBS-NEXT: and a6, a6, t6
-; RV32IMZBS-NEXT: and a7, a7, t6
+; RV32IMZBS-NEXT: and a6, a6, s5
+; RV32IMZBS-NEXT: and a7, a7, s5
; RV32IMZBS-NEXT: slli a6, a6, 2
; RV32IMZBS-NEXT: or a6, a7, a6
-; RV32IMZBS-NEXT: srli a7, a1, 2
-; RV32IMZBS-NEXT: and a1, a1, t6
-; RV32IMZBS-NEXT: and a7, a7, t6
-; RV32IMZBS-NEXT: slli a1, a1, 2
-; RV32IMZBS-NEXT: or a1, a7, a1
+; RV32IMZBS-NEXT: srli a7, a5, 2
+; RV32IMZBS-NEXT: and a5, a5, s5
+; RV32IMZBS-NEXT: and a7, a7, s5
+; RV32IMZBS-NEXT: slli a5, a5, 2
+; RV32IMZBS-NEXT: or a5, a7, a5
; RV32IMZBS-NEXT: srli a7, a6, 1
-; RV32IMZBS-NEXT: and a6, a6, t5
-; RV32IMZBS-NEXT: and a7, a7, a0
+; RV32IMZBS-NEXT: and a6, a6, s11
+; RV32IMZBS-NEXT: and a7, a7, a4
; RV32IMZBS-NEXT: slli a6, a6, 1
; RV32IMZBS-NEXT: or a6, a7, a6
-; RV32IMZBS-NEXT: srli a7, a1, 1
-; RV32IMZBS-NEXT: and a1, a1, t5
-; RV32IMZBS-NEXT: and a7, a7, a0
-; RV32IMZBS-NEXT: slli a1, a1, 1
-; RV32IMZBS-NEXT: or a1, a7, a1
+; RV32IMZBS-NEXT: srli a7, a5, 1
+; RV32IMZBS-NEXT: and a5, a5, s11
+; RV32IMZBS-NEXT: and a7, a7, a4
+; RV32IMZBS-NEXT: slli a5, a5, 1
+; RV32IMZBS-NEXT: or a5, a7, a5
; RV32IMZBS-NEXT: srli a6, a6, 1
-; RV32IMZBS-NEXT: srli a1, a1, 1
-; RV32IMZBS-NEXT: lw a7, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a5, a5, 1
+; RV32IMZBS-NEXT: lw a7, 720(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a6, a6, a7
-; RV32IMZBS-NEXT: lw a7, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a7
+; RV32IMZBS-NEXT: lw a7, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, a7
; RV32IMZBS-NEXT: srli a7, a6, 8
; RV32IMZBS-NEXT: srli t0, a6, 24
; RV32IMZBS-NEXT: slli t1, a6, 24
-; RV32IMZBS-NEXT: and a6, a6, t3
-; RV32IMZBS-NEXT: and a7, a7, t3
+; RV32IMZBS-NEXT: and a6, a6, t2
+; RV32IMZBS-NEXT: and a7, a7, t2
; RV32IMZBS-NEXT: or a7, a7, t0
-; RV32IMZBS-NEXT: srli t0, a1, 8
+; RV32IMZBS-NEXT: srli t0, a5, 8
; RV32IMZBS-NEXT: slli a6, a6, 8
; RV32IMZBS-NEXT: or a6, t1, a6
-; RV32IMZBS-NEXT: srli t1, a1, 24
-; RV32IMZBS-NEXT: and t0, t0, t3
+; RV32IMZBS-NEXT: srli t1, a5, 24
+; RV32IMZBS-NEXT: and t0, t0, t2
; RV32IMZBS-NEXT: or t0, t0, t1
-; RV32IMZBS-NEXT: and t1, a1, t3
-; RV32IMZBS-NEXT: slli a1, a1, 24
+; RV32IMZBS-NEXT: and t1, a5, t2
+; RV32IMZBS-NEXT: slli a5, a5, 24
; RV32IMZBS-NEXT: slli t1, t1, 8
-; RV32IMZBS-NEXT: or a1, a1, t1
+; RV32IMZBS-NEXT: or a5, a5, t1
; RV32IMZBS-NEXT: or a6, a6, a7
-; RV32IMZBS-NEXT: or a1, a1, t0
+; RV32IMZBS-NEXT: or a5, a5, t0
; RV32IMZBS-NEXT: srli a7, a6, 4
-; RV32IMZBS-NEXT: and a6, a6, s5
-; RV32IMZBS-NEXT: srli t0, a1, 4
-; RV32IMZBS-NEXT: and a1, a1, s5
-; RV32IMZBS-NEXT: and a7, a7, s5
-; RV32IMZBS-NEXT: and t0, t0, s5
+; RV32IMZBS-NEXT: and a6, a6, s4
+; RV32IMZBS-NEXT: srli t0, a5, 4
+; RV32IMZBS-NEXT: and a5, a5, s4
+; RV32IMZBS-NEXT: and a7, a7, s4
+; RV32IMZBS-NEXT: and t0, t0, s4
; RV32IMZBS-NEXT: slli a6, a6, 4
-; RV32IMZBS-NEXT: slli a1, a1, 4
+; RV32IMZBS-NEXT: slli a5, a5, 4
; RV32IMZBS-NEXT: or a6, a7, a6
-; RV32IMZBS-NEXT: or a1, t0, a1
+; RV32IMZBS-NEXT: or a5, t0, a5
; RV32IMZBS-NEXT: srli a7, a6, 2
-; RV32IMZBS-NEXT: and a6, a6, t6
-; RV32IMZBS-NEXT: srli t0, a1, 2
-; RV32IMZBS-NEXT: and a1, a1, t6
-; RV32IMZBS-NEXT: and a7, a7, t6
-; RV32IMZBS-NEXT: and t0, t0, t6
+; RV32IMZBS-NEXT: and a6, a6, s5
+; RV32IMZBS-NEXT: srli t0, a5, 2
+; RV32IMZBS-NEXT: and a5, a5, s5
+; RV32IMZBS-NEXT: and a7, a7, s5
+; RV32IMZBS-NEXT: and t0, t0, s5
; RV32IMZBS-NEXT: slli a6, a6, 2
; RV32IMZBS-NEXT: or a6, a7, a6
-; RV32IMZBS-NEXT: slli a1, a1, 2
-; RV32IMZBS-NEXT: or a1, t0, a1
+; RV32IMZBS-NEXT: slli a5, a5, 2
+; RV32IMZBS-NEXT: or a5, t0, a5
; RV32IMZBS-NEXT: srli a7, a6, 1
-; RV32IMZBS-NEXT: and a6, a6, t5
-; RV32IMZBS-NEXT: and t0, a1, t5
-; RV32IMZBS-NEXT: srli a1, a1, 1
-; RV32IMZBS-NEXT: and a7, a7, a0
-; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: and a6, a6, s11
+; RV32IMZBS-NEXT: and t0, a5, s11
+; RV32IMZBS-NEXT: srli a5, a5, 1
+; RV32IMZBS-NEXT: and a7, a7, a4
+; RV32IMZBS-NEXT: and a4, a5, a4
; RV32IMZBS-NEXT: slli a6, a6, 1
-; RV32IMZBS-NEXT: or a1, a7, a6
+; RV32IMZBS-NEXT: or a5, a7, a6
; RV32IMZBS-NEXT: slli t0, t0, 1
-; RV32IMZBS-NEXT: or a0, a0, t0
-; RV32IMZBS-NEXT: slli a3, a3, 31
-; RV32IMZBS-NEXT: srli a1, a1, 1
-; RV32IMZBS-NEXT: or a1, a1, a3
-; RV32IMZBS-NEXT: slli a2, a2, 31
-; RV32IMZBS-NEXT: srli a0, a0, 1
-; RV32IMZBS-NEXT: or a0, a0, a2
-; RV32IMZBS-NEXT: srli a4, a4, 1
+; RV32IMZBS-NEXT: or a4, a4, t0
+; RV32IMZBS-NEXT: slli a1, a1, 31
; RV32IMZBS-NEXT: srli a5, a5, 1
-; RV32IMZBS-NEXT: lw a2, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: sw a1, 0(a2)
-; RV32IMZBS-NEXT: sw a4, 4(a2)
-; RV32IMZBS-NEXT: sw a0, 8(a2)
-; RV32IMZBS-NEXT: sw a5, 12(a2)
-; RV32IMZBS-NEXT: lw a2, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: sw a1, 0(a2)
-; RV32IMZBS-NEXT: sw a4, 4(a2)
-; RV32IMZBS-NEXT: sw a0, 8(a2)
-; RV32IMZBS-NEXT: sw a5, 12(a2)
-; RV32IMZBS-NEXT: lw ra, 748(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 744(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 740(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s2, 736(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s3, 732(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s4, 728(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s5, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s8, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s9, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s10, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s11, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: addi sp, sp, 752
+; RV32IMZBS-NEXT: or a1, a5, a1
+; RV32IMZBS-NEXT: slli a0, a0, 31
+; RV32IMZBS-NEXT: srli a4, a4, 1
+; RV32IMZBS-NEXT: or a0, a4, a0
+; RV32IMZBS-NEXT: srli a2, a2, 1
+; RV32IMZBS-NEXT: srli a3, a3, 1
+; RV32IMZBS-NEXT: lw a4, 732(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a1, 0(a4)
+; RV32IMZBS-NEXT: sw a2, 4(a4)
+; RV32IMZBS-NEXT: sw a0, 8(a4)
+; RV32IMZBS-NEXT: sw a3, 12(a4)
+; RV32IMZBS-NEXT: lw a4, 736(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a1, 0(a4)
+; RV32IMZBS-NEXT: sw a2, 4(a4)
+; RV32IMZBS-NEXT: sw a0, 8(a4)
+; RV32IMZBS-NEXT: sw a3, 12(a4)
+; RV32IMZBS-NEXT: lw ra, 796(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 792(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 788(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 784(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s3, 780(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s4, 776(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s5, 772(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s6, 768(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 764(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 760(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 756(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s10, 752(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s11, 748(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: addi sp, sp, 800
; RV32IMZBS-NEXT: ret
;
; RV64IMZBS-LABEL: commutative_clmulh_v2i64:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -1200
-; RV64IMZBS-NEXT: sd ra, 1192(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s0, 1184(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 1176(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s2, 1168(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s3, 1160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s4, 1152(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s5, 1144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s6, 1136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s7, 1128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s8, 1120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s9, 1112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s10, 1104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s11, 1096(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd a5, 992(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd a4, 984(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s5, a0
-; RV64IMZBS-NEXT: srli a4, a2, 24
-; RV64IMZBS-NEXT: lui s9, 4080
+; RV64IMZBS-NEXT: addi sp, sp, -1088
+; RV64IMZBS-NEXT: sd ra, 1080(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s0, 1072(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 1064(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 1056(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s3, 1048(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s4, 1040(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s5, 1032(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s6, 1024(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s7, 1016(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s8, 1008(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s9, 1000(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s10, 992(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s11, 984(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd a5, 976(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd a4, 968(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: mv t5, a0
+; RV64IMZBS-NEXT: srli a5, a2, 24
+; RV64IMZBS-NEXT: lui s8, 4080
; RV64IMZBS-NEXT: srli a7, a2, 8
-; RV64IMZBS-NEXT: li t4, 255
-; RV64IMZBS-NEXT: srli a5, a2, 40
-; RV64IMZBS-NEXT: lui s10, 16
+; RV64IMZBS-NEXT: li t3, 255
+; RV64IMZBS-NEXT: srli a4, a2, 40
+; RV64IMZBS-NEXT: lui a6, 16
; RV64IMZBS-NEXT: srli t0, a2, 56
; RV64IMZBS-NEXT: srliw t1, a2, 24
; RV64IMZBS-NEXT: slli a0, a2, 56
-; RV64IMZBS-NEXT: sd a0, 1080(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui t3, 61681
-; RV64IMZBS-NEXT: lui t5, 209715
-; RV64IMZBS-NEXT: lui s11, 349525
-; RV64IMZBS-NEXT: srli s2, s5, 24
-; RV64IMZBS-NEXT: srli s0, s5, 8
-; RV64IMZBS-NEXT: srli t2, s5, 40
-; RV64IMZBS-NEXT: srli a6, s5, 56
-; RV64IMZBS-NEXT: srliw s4, s5, 24
-; RV64IMZBS-NEXT: slli t6, s5, 56
-; RV64IMZBS-NEXT: srli s6, a3, 24
-; RV64IMZBS-NEXT: srli s8, a3, 8
-; RV64IMZBS-NEXT: srli s3, a3, 40
-; RV64IMZBS-NEXT: srli s7, a3, 56
-; RV64IMZBS-NEXT: and a4, a4, s9
-; RV64IMZBS-NEXT: slli s1, t4, 24
-; RV64IMZBS-NEXT: and a7, a7, s1
-; RV64IMZBS-NEXT: or a7, a7, a4
-; RV64IMZBS-NEXT: addi a0, s10, -256
-; RV64IMZBS-NEXT: and a4, a5, a0
-; RV64IMZBS-NEXT: or a5, a4, t0
-; RV64IMZBS-NEXT: and a4, a2, s9
+; RV64IMZBS-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: lui t6, 61681
+; RV64IMZBS-NEXT: lui s2, 209715
+; RV64IMZBS-NEXT: lui t4, 349525
+; RV64IMZBS-NEXT: srli t2, t5, 24
+; RV64IMZBS-NEXT: srli s0, t5, 8
+; RV64IMZBS-NEXT: srli s1, t5, 40
+; RV64IMZBS-NEXT: srli s3, t5, 56
+; RV64IMZBS-NEXT: srliw s4, t5, 24
+; RV64IMZBS-NEXT: slli a0, t5, 56
+; RV64IMZBS-NEXT: sd a0, 912(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli s7, a3, 24
+; RV64IMZBS-NEXT: srli ra, a3, 8
+; RV64IMZBS-NEXT: srli s6, a3, 40
+; RV64IMZBS-NEXT: srli s11, a3, 56
+; RV64IMZBS-NEXT: and a5, a5, s8
+; RV64IMZBS-NEXT: slli t3, t3, 24
+; RV64IMZBS-NEXT: and a7, a7, t3
+; RV64IMZBS-NEXT: or a5, a7, a5
+; RV64IMZBS-NEXT: srliw a0, a3, 24
+; RV64IMZBS-NEXT: addi s5, a6, -256
+; RV64IMZBS-NEXT: and a4, a4, s5
+; RV64IMZBS-NEXT: or a6, a4, t0
+; RV64IMZBS-NEXT: and a4, a2, s8
; RV64IMZBS-NEXT: slli t1, t1, 32
-; RV64IMZBS-NEXT: addi t0, t3, -241
-; RV64IMZBS-NEXT: addi t3, t5, 819
-; RV64IMZBS-NEXT: addi t4, s11, 1365
+; RV64IMZBS-NEXT: addi s10, t6, -241
+; RV64IMZBS-NEXT: addi a7, s2, 819
+; RV64IMZBS-NEXT: addi t0, t4, 1365
; RV64IMZBS-NEXT: slli a4, a4, 24
; RV64IMZBS-NEXT: or a4, a4, t1
-; RV64IMZBS-NEXT: slli t1, t0, 32
-; RV64IMZBS-NEXT: add ra, t0, t1
-; RV64IMZBS-NEXT: slli t0, t3, 32
-; RV64IMZBS-NEXT: add s11, t3, t0
-; RV64IMZBS-NEXT: slli t0, t4, 32
-; RV64IMZBS-NEXT: add s10, t4, t0
-; RV64IMZBS-NEXT: srliw t1, a3, 24
-; RV64IMZBS-NEXT: and t0, s2, s9
-; RV64IMZBS-NEXT: and t3, s0, s1
-; RV64IMZBS-NEXT: or t0, t3, t0
-; RV64IMZBS-NEXT: srli t3, a1, 24
-; RV64IMZBS-NEXT: and t2, t2, a0
-; RV64IMZBS-NEXT: or t2, t2, a6
-; RV64IMZBS-NEXT: and a6, s5, s9
+; RV64IMZBS-NEXT: slli t1, s10, 32
+; RV64IMZBS-NEXT: add s10, s10, t1
+; RV64IMZBS-NEXT: slli t1, a7, 32
+; RV64IMZBS-NEXT: add t4, a7, t1
+; RV64IMZBS-NEXT: slli a7, t0, 32
+; RV64IMZBS-NEXT: add s9, t0, a7
+; RV64IMZBS-NEXT: slli a7, a3, 56
+; RV64IMZBS-NEXT: and t0, t2, s8
+; RV64IMZBS-NEXT: and t1, s0, t3
+; RV64IMZBS-NEXT: or t0, t1, t0
+; RV64IMZBS-NEXT: srli t1, a1, 24
+; RV64IMZBS-NEXT: and t2, s1, s5
+; RV64IMZBS-NEXT: or t2, t2, s3
+; RV64IMZBS-NEXT: and t6, t5, s8
; RV64IMZBS-NEXT: slli s4, s4, 32
-; RV64IMZBS-NEXT: slli a6, a6, 24
-; RV64IMZBS-NEXT: or a6, a6, s4
-; RV64IMZBS-NEXT: srli t4, a1, 8
-; RV64IMZBS-NEXT: and t5, s6, s9
-; RV64IMZBS-NEXT: mv s4, s1
-; RV64IMZBS-NEXT: sd s1, 976(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s0, s8, s1
-; RV64IMZBS-NEXT: or t5, s0, t5
-; RV64IMZBS-NEXT: srli s0, a1, 40
-; RV64IMZBS-NEXT: and s1, s3, a0
-; RV64IMZBS-NEXT: or s1, s1, s7
-; RV64IMZBS-NEXT: and s2, a3, s9
-; RV64IMZBS-NEXT: slli t1, t1, 32
-; RV64IMZBS-NEXT: slli s2, s2, 24
-; RV64IMZBS-NEXT: or t1, s2, t1
-; RV64IMZBS-NEXT: srli s2, a1, 56
-; RV64IMZBS-NEXT: and t3, t3, s9
-; RV64IMZBS-NEXT: and t4, t4, s4
-; RV64IMZBS-NEXT: or t3, t4, t3
-; RV64IMZBS-NEXT: srliw t4, a1, 24
-; RV64IMZBS-NEXT: and s0, s0, a0
-; RV64IMZBS-NEXT: or s0, s0, s2
-; RV64IMZBS-NEXT: and s2, a1, s9
-; RV64IMZBS-NEXT: slli t4, t4, 32
-; RV64IMZBS-NEXT: slli s2, s2, 24
-; RV64IMZBS-NEXT: or t4, s2, t4
-; RV64IMZBS-NEXT: bseti s2, zero, 11
-; RV64IMZBS-NEXT: sd s2, 1088(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: or a5, a7, a5
-; RV64IMZBS-NEXT: mv s2, a0
-; RV64IMZBS-NEXT: sd a0, 960(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a2, a0
+; RV64IMZBS-NEXT: slli t6, t6, 24
+; RV64IMZBS-NEXT: or t6, t6, s4
+; RV64IMZBS-NEXT: srli s0, a1, 8
+; RV64IMZBS-NEXT: and s1, s7, s8
+; RV64IMZBS-NEXT: mv s7, t3
+; RV64IMZBS-NEXT: sd t3, 960(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and s2, ra, t3
+; RV64IMZBS-NEXT: or s1, s2, s1
+; RV64IMZBS-NEXT: srli s2, a1, 40
+; RV64IMZBS-NEXT: and s3, s6, s5
+; RV64IMZBS-NEXT: or s3, s3, s11
+; RV64IMZBS-NEXT: and s4, a3, s8
+; RV64IMZBS-NEXT: slli t3, a0, 32
+; RV64IMZBS-NEXT: slli s4, s4, 24
+; RV64IMZBS-NEXT: or t3, s4, t3
+; RV64IMZBS-NEXT: srli s4, a1, 56
+; RV64IMZBS-NEXT: and t1, t1, s8
+; RV64IMZBS-NEXT: lui a0, 4080
+; RV64IMZBS-NEXT: and s0, s0, s7
+; RV64IMZBS-NEXT: or t1, s0, t1
+; RV64IMZBS-NEXT: srliw s0, a1, 24
+; RV64IMZBS-NEXT: and s2, s2, s5
+; RV64IMZBS-NEXT: or s2, s2, s4
+; RV64IMZBS-NEXT: and s4, a1, a0
+; RV64IMZBS-NEXT: slli s0, s0, 32
+; RV64IMZBS-NEXT: slli s4, s4, 24
+; RV64IMZBS-NEXT: or s0, s4, s0
+; RV64IMZBS-NEXT: slli s4, a1, 56
+; RV64IMZBS-NEXT: sd s5, 944(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and a2, a2, s5
+; RV64IMZBS-NEXT: and a0, t5, s5
+; RV64IMZBS-NEXT: and a3, a3, s5
+; RV64IMZBS-NEXT: and a1, a1, s5
; RV64IMZBS-NEXT: slli a2, a2, 40
-; RV64IMZBS-NEXT: ld a0, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: or a2, a0, a2
-; RV64IMZBS-NEXT: or a7, t0, t2
-; RV64IMZBS-NEXT: and a0, s5, s2
; RV64IMZBS-NEXT: slli a0, a0, 40
-; RV64IMZBS-NEXT: or a0, t6, a0
-; RV64IMZBS-NEXT: or t0, t5, s1
-; RV64IMZBS-NEXT: slli t2, a3, 56
-; RV64IMZBS-NEXT: and a3, a3, s2
; RV64IMZBS-NEXT: slli a3, a3, 40
-; RV64IMZBS-NEXT: or a3, t2, a3
-; RV64IMZBS-NEXT: or t2, t3, s0
-; RV64IMZBS-NEXT: slli t3, a1, 56
-; RV64IMZBS-NEXT: and a1, a1, s2
; RV64IMZBS-NEXT: slli a1, a1, 40
-; RV64IMZBS-NEXT: or a1, t3, a1
+; RV64IMZBS-NEXT: or a5, a5, a6
+; RV64IMZBS-NEXT: ld a6, 920(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: or a2, a6, a2
+; RV64IMZBS-NEXT: or a6, t0, t2
+; RV64IMZBS-NEXT: ld t0, 912(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: or a0, t0, a0
+; RV64IMZBS-NEXT: or t0, s1, s3
+; RV64IMZBS-NEXT: or a3, a7, a3
+; RV64IMZBS-NEXT: or a7, t1, s2
+; RV64IMZBS-NEXT: or a1, s4, a1
; RV64IMZBS-NEXT: or a2, a2, a4
-; RV64IMZBS-NEXT: or a0, a0, a6
-; RV64IMZBS-NEXT: or a3, a3, t1
-; RV64IMZBS-NEXT: or a1, a1, t4
-; RV64IMZBS-NEXT: lui s0, 1024
+; RV64IMZBS-NEXT: or a0, a0, t6
+; RV64IMZBS-NEXT: or a3, a3, t3
+; RV64IMZBS-NEXT: or a1, a1, s0
; RV64IMZBS-NEXT: or a2, a2, a5
-; RV64IMZBS-NEXT: or a0, a0, a7
+; RV64IMZBS-NEXT: or a0, a0, a6
; RV64IMZBS-NEXT: or a3, a3, t0
-; RV64IMZBS-NEXT: or a1, a1, t2
+; RV64IMZBS-NEXT: or a1, a1, a7
; RV64IMZBS-NEXT: srli a4, a2, 4
-; RV64IMZBS-NEXT: sd ra, 968(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a2, ra
-; RV64IMZBS-NEXT: and a4, a4, ra
+; RV64IMZBS-NEXT: sd s10, 952(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and a2, a2, s10
+; RV64IMZBS-NEXT: srli a5, a0, 4
+; RV64IMZBS-NEXT: and a0, a0, s10
+; RV64IMZBS-NEXT: srli a6, a3, 4
+; RV64IMZBS-NEXT: and a3, a3, s10
+; RV64IMZBS-NEXT: srli a7, a1, 4
+; RV64IMZBS-NEXT: and a1, a1, s10
+; RV64IMZBS-NEXT: and a4, a4, s10
; RV64IMZBS-NEXT: slli a2, a2, 4
-; RV64IMZBS-NEXT: or a2, a4, a2
-; RV64IMZBS-NEXT: srli a4, a0, 4
-; RV64IMZBS-NEXT: and a0, a0, ra
-; RV64IMZBS-NEXT: and a4, a4, ra
+; RV64IMZBS-NEXT: and a5, a5, s10
; RV64IMZBS-NEXT: slli a0, a0, 4
-; RV64IMZBS-NEXT: or a0, a4, a0
-; RV64IMZBS-NEXT: srli a4, a3, 4
-; RV64IMZBS-NEXT: and a3, a3, ra
-; RV64IMZBS-NEXT: and a4, a4, ra
+; RV64IMZBS-NEXT: and a6, a6, s10
; RV64IMZBS-NEXT: slli a3, a3, 4
-; RV64IMZBS-NEXT: or a3, a4, a3
-; RV64IMZBS-NEXT: srli a4, a1, 4
-; RV64IMZBS-NEXT: and a1, a1, ra
-; RV64IMZBS-NEXT: and a4, a4, ra
+; RV64IMZBS-NEXT: and a7, a7, s10
; RV64IMZBS-NEXT: slli a1, a1, 4
-; RV64IMZBS-NEXT: or a1, a4, a1
+; RV64IMZBS-NEXT: or a2, a4, a2
+; RV64IMZBS-NEXT: or a0, a5, a0
+; RV64IMZBS-NEXT: or a3, a6, a3
+; RV64IMZBS-NEXT: or a1, a7, a1
; RV64IMZBS-NEXT: srli a4, a2, 2
-; RV64IMZBS-NEXT: sd s11, 952(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a2, s11
-; RV64IMZBS-NEXT: and a4, a4, s11
+; RV64IMZBS-NEXT: sd t4, 936(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and a2, a2, t4
+; RV64IMZBS-NEXT: srli a5, a0, 2
+; RV64IMZBS-NEXT: and a0, a0, t4
+; RV64IMZBS-NEXT: srli a6, a3, 2
+; RV64IMZBS-NEXT: and a3, a3, t4
+; RV64IMZBS-NEXT: srli a7, a1, 2
+; RV64IMZBS-NEXT: and a1, a1, t4
+; RV64IMZBS-NEXT: and a4, a4, t4
; RV64IMZBS-NEXT: slli a2, a2, 2
-; RV64IMZBS-NEXT: or a2, a4, a2
-; RV64IMZBS-NEXT: srli a4, a0, 2
-; RV64IMZBS-NEXT: and a0, a0, s11
-; RV64IMZBS-NEXT: and a4, a4, s11
+; RV64IMZBS-NEXT: and a5, a5, t4
; RV64IMZBS-NEXT: slli a0, a0, 2
-; RV64IMZBS-NEXT: or a0, a4, a0
-; RV64IMZBS-NEXT: srli a4, a3, 2
-; RV64IMZBS-NEXT: and a3, a3, s11
-; RV64IMZBS-NEXT: and a4, a4, s11
+; RV64IMZBS-NEXT: and a6, a6, t4
; RV64IMZBS-NEXT: slli a3, a3, 2
-; RV64IMZBS-NEXT: or a4, a4, a3
-; RV64IMZBS-NEXT: srli a3, a1, 2
-; RV64IMZBS-NEXT: and a1, a1, s11
-; RV64IMZBS-NEXT: and a3, a3, s11
+; RV64IMZBS-NEXT: and a7, a7, t4
; RV64IMZBS-NEXT: slli a1, a1, 2
-; RV64IMZBS-NEXT: or a1, a3, a1
-; RV64IMZBS-NEXT: srli a3, a2, 1
-; RV64IMZBS-NEXT: sd s10, 944(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a2, s10
-; RV64IMZBS-NEXT: and a3, a3, s10
-; RV64IMZBS-NEXT: slli a2, a2, 1
-; RV64IMZBS-NEXT: or s5, a3, a2
-; RV64IMZBS-NEXT: srli a2, a0, 1
-; RV64IMZBS-NEXT: and a0, a0, s10
-; RV64IMZBS-NEXT: and a2, a2, s10
-; RV64IMZBS-NEXT: slli a0, a0, 1
-; RV64IMZBS-NEXT: or a3, a2, a0
-; RV64IMZBS-NEXT: srli a0, a4, 1
-; RV64IMZBS-NEXT: and a2, a4, s10
-; RV64IMZBS-NEXT: and a0, a0, s10
+; RV64IMZBS-NEXT: or a2, a4, a2
+; RV64IMZBS-NEXT: or a0, a5, a0
+; RV64IMZBS-NEXT: or a3, a6, a3
+; RV64IMZBS-NEXT: or a1, a7, a1
+; RV64IMZBS-NEXT: srli a4, a2, 1
+; RV64IMZBS-NEXT: sd s9, 928(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and a2, a2, s9
+; RV64IMZBS-NEXT: srli a5, a0, 1
+; RV64IMZBS-NEXT: and a0, a0, s9
+; RV64IMZBS-NEXT: srli a6, a3, 1
+; RV64IMZBS-NEXT: and a3, a3, s9
+; RV64IMZBS-NEXT: srli a7, a1, 1
+; RV64IMZBS-NEXT: and a1, a1, s9
+; RV64IMZBS-NEXT: and a4, a4, s9
; RV64IMZBS-NEXT: slli a2, a2, 1
-; RV64IMZBS-NEXT: or a0, a0, a2
-; RV64IMZBS-NEXT: srli a2, a1, 1
-; RV64IMZBS-NEXT: and a1, a1, s10
-; RV64IMZBS-NEXT: and a2, a2, s10
-; RV64IMZBS-NEXT: slli a1, a1, 1
-; RV64IMZBS-NEXT: or s11, a2, a1
-; RV64IMZBS-NEXT: bseti a1, zero, 31
-; RV64IMZBS-NEXT: sd a1, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 32
-; RV64IMZBS-NEXT: sd a1, 1016(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 33
-; RV64IMZBS-NEXT: sd a1, 1024(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 34
-; RV64IMZBS-NEXT: sd a1, 1032(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 35
-; RV64IMZBS-NEXT: sd a1, 1040(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 36
-; RV64IMZBS-NEXT: sd a1, 1048(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 37
-; RV64IMZBS-NEXT: sd a1, 1056(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 38
-; RV64IMZBS-NEXT: sd a1, 1064(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 39
-; RV64IMZBS-NEXT: sd a1, 1072(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 40
-; RV64IMZBS-NEXT: sd a1, 1080(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 41
-; RV64IMZBS-NEXT: sd a1, 1000(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti ra, zero, 42
-; RV64IMZBS-NEXT: sd ra, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s10, zero, 43
-; RV64IMZBS-NEXT: sd s10, 416(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s9, zero, 44
-; RV64IMZBS-NEXT: sd s9, 424(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s8, zero, 45
-; RV64IMZBS-NEXT: sd s8, 432(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s7, zero, 46
-; RV64IMZBS-NEXT: sd s7, 440(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti s6, zero, 47
-; RV64IMZBS-NEXT: bseti s4, zero, 48
-; RV64IMZBS-NEXT: bseti s3, zero, 49
-; RV64IMZBS-NEXT: bseti s2, zero, 50
-; RV64IMZBS-NEXT: bseti s1, zero, 51
-; RV64IMZBS-NEXT: bseti t6, zero, 52
-; RV64IMZBS-NEXT: bseti t5, zero, 53
-; RV64IMZBS-NEXT: bseti t4, zero, 54
-; RV64IMZBS-NEXT: bseti t3, zero, 55
-; RV64IMZBS-NEXT: bseti t2, zero, 56
-; RV64IMZBS-NEXT: bseti t1, zero, 57
-; RV64IMZBS-NEXT: bseti t0, zero, 58
-; RV64IMZBS-NEXT: bseti a7, zero, 59
-; RV64IMZBS-NEXT: bseti a5, zero, 60
-; RV64IMZBS-NEXT: bseti a4, zero, 61
-; RV64IMZBS-NEXT: bseti a2, zero, 62
-; RV64IMZBS-NEXT: bseti a1, zero, 63
-; RV64IMZBS-NEXT: andi a6, a3, 2
-; RV64IMZBS-NEXT: sd a6, 936(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 1
-; RV64IMZBS-NEXT: sd a6, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 4
-; RV64IMZBS-NEXT: sd a6, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 8
-; RV64IMZBS-NEXT: sd a6, 912(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 16
-; RV64IMZBS-NEXT: sd a6, 904(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 32
-; RV64IMZBS-NEXT: sd a6, 896(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 64
-; RV64IMZBS-NEXT: sd a6, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 128
-; RV64IMZBS-NEXT: sd a6, 880(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 256
-; RV64IMZBS-NEXT: sd a6, 872(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 512
-; RV64IMZBS-NEXT: sd a6, 864(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a3, 1024
-; RV64IMZBS-NEXT: sd a6, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1088(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 848(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 1
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 840(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 2
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 832(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 4
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 824(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 8
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 816(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 16
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 808(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 32
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 800(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 64
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 792(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 128
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 776(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 256
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 768(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 512
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 752(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s0, a3, s0
-; RV64IMZBS-NEXT: sd s0, 744(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui s0, 2048
-; RV64IMZBS-NEXT: and s0, a3, s0
-; RV64IMZBS-NEXT: lui a6, 4096
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 736(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 8192
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 728(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 16384
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 712(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 32768
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 696(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 65536
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 680(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 131072
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 664(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a6, 262144
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 624(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 592(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1016(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 576(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1024(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 560(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1032(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 520(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1040(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 496(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 488(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 480(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 472(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1072(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 464(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 456(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a6, 1000(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, a3, a6
-; RV64IMZBS-NEXT: sd a6, 448(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a6, a3, ra
-; RV64IMZBS-NEXT: and s10, a3, s10
-; RV64IMZBS-NEXT: sd s10, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s9, a3, s9
-; RV64IMZBS-NEXT: sd s9, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s8, a3, s8
-; RV64IMZBS-NEXT: sd s8, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s7, a3, s7
-; RV64IMZBS-NEXT: sd s7, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s7, a3, s6
-; RV64IMZBS-NEXT: sd s7, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv ra, s6
-; RV64IMZBS-NEXT: and s6, a3, s4
-; RV64IMZBS-NEXT: sd s6, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s10, s4
-; RV64IMZBS-NEXT: and s4, a3, s3
-; RV64IMZBS-NEXT: sd s4, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s9, s3
-; RV64IMZBS-NEXT: and s3, a3, s2
-; RV64IMZBS-NEXT: sd s3, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s8, s2
-; RV64IMZBS-NEXT: and s2, a3, s1
-; RV64IMZBS-NEXT: sd s2, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s7, s1
-; RV64IMZBS-NEXT: and s1, a3, t6
-; RV64IMZBS-NEXT: sd s1, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s6, t6
-; RV64IMZBS-NEXT: and t6, a3, t5
-; RV64IMZBS-NEXT: sd t6, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s4, t5
-; RV64IMZBS-NEXT: and t5, a3, t4
-; RV64IMZBS-NEXT: sd t5, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s3, t4
-; RV64IMZBS-NEXT: and t4, a3, t3
-; RV64IMZBS-NEXT: sd t4, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s2, t3
-; RV64IMZBS-NEXT: and t3, a3, t2
-; RV64IMZBS-NEXT: mv s1, t2
-; RV64IMZBS-NEXT: and t2, a3, t1
-; RV64IMZBS-NEXT: sd t2, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv t4, t1
-; RV64IMZBS-NEXT: and t1, a3, t0
-; RV64IMZBS-NEXT: sd t1, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv t2, t0
-; RV64IMZBS-NEXT: and t0, a3, a7
-; RV64IMZBS-NEXT: sd t0, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv t1, a7
-; RV64IMZBS-NEXT: and a7, a3, a5
-; RV64IMZBS-NEXT: sd a7, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv t0, a5
-; RV64IMZBS-NEXT: and a5, a3, a4
-; RV64IMZBS-NEXT: sd a5, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv a7, a4
-; RV64IMZBS-NEXT: and a4, a3, a2
-; RV64IMZBS-NEXT: sd a4, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv a4, a2
-; RV64IMZBS-NEXT: and a3, a3, a1
-; RV64IMZBS-NEXT: mv a2, a1
-; RV64IMZBS-NEXT: ld a1, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 648(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t5, s5, a1
-; RV64IMZBS-NEXT: ld a1, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t6, s5, a1
-; RV64IMZBS-NEXT: ld a1, 912(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 640(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 632(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 896(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 616(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 720(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 608(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 864(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 600(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 704(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 848(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 784(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a5, s5, a1
-; RV64IMZBS-NEXT: sd a5, 864(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 832(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 584(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 824(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 568(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 816(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 688(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 808(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 760(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 800(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 840(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 792(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
+; RV64IMZBS-NEXT: and a5, a5, s9
+; RV64IMZBS-NEXT: slli t0, a0, 1
+; RV64IMZBS-NEXT: and a6, a6, s9
+; RV64IMZBS-NEXT: slli a3, a3, 1
+; RV64IMZBS-NEXT: and a7, a7, s9
+; RV64IMZBS-NEXT: slli a0, a1, 1
+; RV64IMZBS-NEXT: or t4, a4, a2
+; RV64IMZBS-NEXT: or t5, a5, t0
+; RV64IMZBS-NEXT: srli a2, t0, 63
+; RV64IMZBS-NEXT: or t0, a6, a3
+; RV64IMZBS-NEXT: or s5, a7, a0
+; RV64IMZBS-NEXT: srli a0, a0, 63
+; RV64IMZBS-NEXT: slli a3, t4, 1
+; RV64IMZBS-NEXT: andi a4, t5, 2
+; RV64IMZBS-NEXT: slli a5, t4, 2
+; RV64IMZBS-NEXT: andi a6, t5, 4
+; RV64IMZBS-NEXT: slli a7, t4, 3
+; RV64IMZBS-NEXT: andi t1, t5, 8
+; RV64IMZBS-NEXT: slli t2, t4, 4
+; RV64IMZBS-NEXT: andi t3, t5, 16
+; RV64IMZBS-NEXT: slli t6, t4, 5
+; RV64IMZBS-NEXT: andi s0, t5, 32
+; RV64IMZBS-NEXT: slli s1, t4, 6
+; RV64IMZBS-NEXT: andi s2, t5, 64
+; RV64IMZBS-NEXT: slli s3, t4, 7
+; RV64IMZBS-NEXT: slli s4, t4, 63
+; RV64IMZBS-NEXT: seqz a2, a2
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: and a1, a2, s4
; RV64IMZBS-NEXT: sd a1, 912(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 776(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 552(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 768(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 544(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 752(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 672(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 744(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 752(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, s5, s0
-; RV64IMZBS-NEXT: sd a1, 816(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 736(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 536(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 728(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 528(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 712(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 656(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 696(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 728(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 680(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 808(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 664(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 872(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 624(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 592(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 936(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 576(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 512(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 560(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 504(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 520(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 592(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 496(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 696(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 488(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 776(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 480(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 832(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 896(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 464(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 488(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 480(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, s5, a6
-; RV64IMZBS-NEXT: sd a1, 560(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 664(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 744(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 800(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 904(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 472(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 464(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 520(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 624(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 712(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 768(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 824(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 880(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, s5, t3
-; RV64IMZBS-NEXT: sd a1, 456(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 448(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 496(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 576(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 680(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 736(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 792(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, s5, a3
+; RV64IMZBS-NEXT: slli a2, t0, 63
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t5, 128
+; RV64IMZBS-NEXT: seqz a2, a4
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 880(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a2, t4, 8
+; RV64IMZBS-NEXT: seqz a3, a6
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 864(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a3, t5, 256
+; RV64IMZBS-NEXT: seqz a4, t1
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a1, a4, a7
; RV64IMZBS-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1088(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and s0, s11, a1
-; RV64IMZBS-NEXT: lui a1, 1
-; RV64IMZBS-NEXT: and s5, s11, a1
-; RV64IMZBS-NEXT: lui a1, 2
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1088(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 4
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 8
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 16
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 32
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 64
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 128
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 256
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 512
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 1024
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 2048
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 4096
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 8192
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 16384
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 32768
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 65536
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 131072
-; RV64IMZBS-NEXT: and t3, s11, a1
-; RV64IMZBS-NEXT: lui a1, 262144
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1016(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1016(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1024(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1032(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1040(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1048(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1056(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1064(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1072(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1072(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1000(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a6, s11, a1
-; RV64IMZBS-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 424(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, ra
-; RV64IMZBS-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, s10
-; RV64IMZBS-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, s9
-; RV64IMZBS-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s8, s11, s8
-; RV64IMZBS-NEXT: and a1, s11, s7
-; RV64IMZBS-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, s6
-; RV64IMZBS-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, s4
-; RV64IMZBS-NEXT: sd a1, 200(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s3, s11, s3
-; RV64IMZBS-NEXT: and a1, s11, s2
-; RV64IMZBS-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s1, s11, s1
-; RV64IMZBS-NEXT: and t4, s11, t4
-; RV64IMZBS-NEXT: and a1, s11, t2
-; RV64IMZBS-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, t1
-; RV64IMZBS-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, t0
-; RV64IMZBS-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, a7
-; RV64IMZBS-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, a4
-; RV64IMZBS-NEXT: sd a1, 104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, a2
-; RV64IMZBS-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a1, s11, 2
-; RV64IMZBS-NEXT: andi a2, s11, 1
-; RV64IMZBS-NEXT: andi a3, s11, 4
-; RV64IMZBS-NEXT: andi a4, s11, 8
-; RV64IMZBS-NEXT: andi a5, s11, 16
-; RV64IMZBS-NEXT: andi a7, s11, 32
-; RV64IMZBS-NEXT: andi t0, s11, 64
-; RV64IMZBS-NEXT: andi t1, s11, 128
-; RV64IMZBS-NEXT: andi t2, s11, 256
-; RV64IMZBS-NEXT: andi s2, s11, 512
-; RV64IMZBS-NEXT: andi s7, s11, 1024
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a2, a0, a2
-; RV64IMZBS-NEXT: mul a3, a0, a3
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: mul a1, a0, a5
-; RV64IMZBS-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, a7
-; RV64IMZBS-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, t0
-; RV64IMZBS-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, t1
-; RV64IMZBS-NEXT: sd a1, 432(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, t2
-; RV64IMZBS-NEXT: sd a1, 64(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, s2
-; RV64IMZBS-NEXT: sd a1, 56(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, s7
-; RV64IMZBS-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, s0
-; RV64IMZBS-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, s5
-; RV64IMZBS-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1088(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 48(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 40(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 440(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
+; RV64IMZBS-NEXT: slli a4, t4, 9
+; RV64IMZBS-NEXT: seqz a5, t3
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and t2, a5, t2
+; RV64IMZBS-NEXT: andi a5, t5, 512
+; RV64IMZBS-NEXT: seqz a6, s0
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: and t3, a6, t6
+; RV64IMZBS-NEXT: slli a6, t4, 10
+; RV64IMZBS-NEXT: seqz a7, s2
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: and a1, a7, s1
+; RV64IMZBS-NEXT: sd a1, 888(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a7, t5, 1024
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and t6, a0, s3
+; RV64IMZBS-NEXT: slli a0, t4, 11
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and s2, a3, a2
+; RV64IMZBS-NEXT: not a2, t5
+; RV64IMZBS-NEXT: seqz a3, a5
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 840(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a3, t4, 12
+; RV64IMZBS-NEXT: seqz a4, a7
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a1, a4, a6
+; RV64IMZBS-NEXT: sd a1, 904(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a4, a2, 11
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a6, a4, a0
+; RV64IMZBS-NEXT: bexti a0, a2, 12
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 776(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 13
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 13
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 816(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 14
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 14
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 856(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 15
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 15
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 896(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 16
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 16
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 736(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 17
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 17
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 728(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 18
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 18
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 768(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 19
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 19
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 800(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 20
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 20
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 832(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 21
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 21
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 872(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 22
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 22
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 680(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 23
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 23
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 664(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 24
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 24
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 712(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 25
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 25
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 752(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 26
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 26
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 792(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 27
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 27
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 808(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 28
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 28
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 824(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 29
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 29
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 608(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 30
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 30
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 600(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sraiw a0, t5, 31
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 31
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 648(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 32
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 32
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 672(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 33
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 33
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 704(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 34
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 34
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 744(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 35
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 35
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 760(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 36
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 36
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 784(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 37
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 37
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 552(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 38
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 38
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 544(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 39
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 39
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 568(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 40
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 40
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 592(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 41
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 41
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 640(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 42
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 42
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 656(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 43
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 43
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 688(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 44
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 44
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 696(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 45
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 45
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 720(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 46
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 46
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 496(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 47
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 47
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 488(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 48
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 48
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 520(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 49
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 49
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 536(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 50
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 50
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 560(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 51
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 51
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 576(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 52
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 52
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 584(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 53
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 53
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 616(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 54
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 54
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 624(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 55
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 55
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 632(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 56
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 56
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 464(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 57
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 57
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 456(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 58
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 58
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 472(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 59
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 59
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 480(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 60
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 60
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 504(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 61
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 61
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 512(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t5, 1
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and t5, a0, t4
+; RV64IMZBS-NEXT: slli t4, t4, 62
+; RV64IMZBS-NEXT: bexti a0, a2, 62
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and a0, a0, t4
+; RV64IMZBS-NEXT: sd a0, 528(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 2
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 1
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 448(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 4
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 2
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 432(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 8
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 3
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 408(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 16
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 4
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 400(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 32
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 5
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 376(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 64
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 6
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 440(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 128
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 7
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 360(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 256
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 8
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 344(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 512
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 9
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 392(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 1024
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 10
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 424(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: not a0, s5
+; RV64IMZBS-NEXT: bexti a2, a0, 11
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 11
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 12
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 12
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 296(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 13
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 13
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 14
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 14
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 15
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 15
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 416(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 16
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 16
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 17
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 17
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 232(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 18
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 18
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 19
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 19
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 320(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 20
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 20
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 352(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 21
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 21
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 384(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 22
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 22
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 192(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 23
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 23
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 168(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 24
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 24
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 25
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 25
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 26
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 26
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 27
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 27
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 312(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 28
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 28
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 336(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 29
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 29
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 30
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 30
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 112(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sraiw a2, s5, 31
+; RV64IMZBS-NEXT: seqz a2, a2
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 31
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 32
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 32
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 184(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 33
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 33
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 34
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 34
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 240(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 35
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 35
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 36
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 36
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 37
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 37
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 38
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 38
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 48(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 39
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 39
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 72(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 40
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 40
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 96(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 41
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 41
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 144(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 42
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 42
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 160(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 43
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 43
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 44
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 44
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 45
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 45
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 46
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 46
+; RV64IMZBS-NEXT: and s11, a2, a3
+; RV64IMZBS-NEXT: bexti a2, a0, 47
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a4, t0, 47
+; RV64IMZBS-NEXT: and s9, a2, a4
+; RV64IMZBS-NEXT: bexti a2, a0, 48
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 48
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 49
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 49
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 50
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 50
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 64(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 51
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli s0, t0, 51
+; RV64IMZBS-NEXT: and a2, a2, s0
+; RV64IMZBS-NEXT: sd a2, 80(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 52
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli s1, t0, 52
+; RV64IMZBS-NEXT: and a2, a2, s1
+; RV64IMZBS-NEXT: sd a2, 88(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 53
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 53
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 104(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 54
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 54
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 120(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 55
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 55
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 128(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 56
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 56
+; RV64IMZBS-NEXT: and s6, a2, a3
+; RV64IMZBS-NEXT: bexti a3, a0, 57
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, t0, 57
+; RV64IMZBS-NEXT: and s4, a3, a4
+; RV64IMZBS-NEXT: bexti a4, a0, 58
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: slli a5, t0, 58
+; RV64IMZBS-NEXT: and s7, a4, a5
+; RV64IMZBS-NEXT: bexti a5, a0, 59
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: slli a7, t0, 59
+; RV64IMZBS-NEXT: and s8, a5, a7
+; RV64IMZBS-NEXT: bexti a7, a0, 60
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: slli t1, t0, 60
+; RV64IMZBS-NEXT: and s10, a7, t1
+; RV64IMZBS-NEXT: bexti t1, a0, 61
+; RV64IMZBS-NEXT: addi t1, t1, -1
+; RV64IMZBS-NEXT: slli a1, t0, 61
+; RV64IMZBS-NEXT: and a1, t1, a1
; RV64IMZBS-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, t3
-; RV64IMZBS-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1000(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1088(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1016(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s10, a0, a1
-; RV64IMZBS-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s9, a0, a1
-; RV64IMZBS-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1032(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1064(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1072(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1080(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s7, a0, a1
-; RV64IMZBS-NEXT: mul s6, a0, a6
-; RV64IMZBS-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1016(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1048(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1072(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s5, a0, a1
-; RV64IMZBS-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s4, a0, a1
-; RV64IMZBS-NEXT: mul a1, a0, s8
-; RV64IMZBS-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 424(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a0, s3
-; RV64IMZBS-NEXT: sd a1, 1024(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1056(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul s3, a0, s1
-; RV64IMZBS-NEXT: mul s2, a0, t4
-; RV64IMZBS-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s8, a0, a1
-; RV64IMZBS-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a0, a0, a1
-; RV64IMZBS-NEXT: sd a0, 1040(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 648(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s1, t5, a0
-; RV64IMZBS-NEXT: ld a0, 640(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t6, a0
-; RV64IMZBS-NEXT: ld a0, 632(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 616(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, a0, a1
+; RV64IMZBS-NEXT: andi a1, s5, 1
+; RV64IMZBS-NEXT: seqz a1, a1
+; RV64IMZBS-NEXT: addi a1, a1, -1
+; RV64IMZBS-NEXT: and a1, a1, t0
+; RV64IMZBS-NEXT: slli t0, t0, 62
+; RV64IMZBS-NEXT: bexti a0, a0, 62
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and a0, a0, t0
+; RV64IMZBS-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: ld a0, 880(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, t5, a0
+; RV64IMZBS-NEXT: ld a0, 864(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 848(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s5, a0, a2
+; RV64IMZBS-NEXT: xor t2, t2, t3
+; RV64IMZBS-NEXT: xor t4, t6, s2
+; RV64IMZBS-NEXT: ld a0, 776(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, a6, a0
+; RV64IMZBS-NEXT: ld a0, 736(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 728(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, a0, a2
+; RV64IMZBS-NEXT: ld a0, 680(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 664(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, a0, s0
; RV64IMZBS-NEXT: ld a0, 608(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 600(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, a0, a1
-; RV64IMZBS-NEXT: ld a0, 584(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 568(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, a0, a1
+; RV64IMZBS-NEXT: ld s1, 600(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, a0, s1
; RV64IMZBS-NEXT: ld a0, 552(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 544(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, a0, s0
-; RV64IMZBS-NEXT: ld a0, 536(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a5, 528(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a0, a5
-; RV64IMZBS-NEXT: ld a0, 512(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 504(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a0, a1
-; RV64IMZBS-NEXT: ld a0, 488(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 480(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a0, a1
-; RV64IMZBS-NEXT: ld a0, 472(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 464(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, a0, a1
-; RV64IMZBS-NEXT: ld a0, 456(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, a0, a1
-; RV64IMZBS-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, a2, a0
-; RV64IMZBS-NEXT: xor a0, a3, a4
-; RV64IMZBS-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a2, 72(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 544(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, a0, a2
+; RV64IMZBS-NEXT: ld a0, 496(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 488(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, a0, a2
+; RV64IMZBS-NEXT: ld a0, 464(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 456(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, a0, a2
+; RV64IMZBS-NEXT: ld a0, 448(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, a1, a0
+; RV64IMZBS-NEXT: ld a0, 432(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 376(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a1, a2
-; RV64IMZBS-NEXT: ld a2, 64(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a3, 56(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a3, 344(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 48(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a4, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a3, 304(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 296(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: ld a4, 32(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, ra
-; RV64IMZBS-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor ra, ra, s11
-; RV64IMZBS-NEXT: xor s9, s10, s9
-; RV64IMZBS-NEXT: xor s6, s7, s6
-; RV64IMZBS-NEXT: xor s4, s5, s4
-; RV64IMZBS-NEXT: xor s2, s3, s2
-; RV64IMZBS-NEXT: xor t3, s1, t3
-; RV64IMZBS-NEXT: ld s1, 720(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t4, s1
-; RV64IMZBS-NEXT: ld s1, 704(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, t5, s1
-; RV64IMZBS-NEXT: ld s1, 688(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, t6, s1
-; RV64IMZBS-NEXT: ld s1, 672(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s0, s1
-; RV64IMZBS-NEXT: ld s1, 656(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, s1
-; RV64IMZBS-NEXT: ld s1, 592(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, s1
-; RV64IMZBS-NEXT: ld s1, 560(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, s1
-; RV64IMZBS-NEXT: ld s1, 520(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, s1
-; RV64IMZBS-NEXT: ld s1, 496(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, s1
-; RV64IMZBS-NEXT: xor a0, t2, a0
-; RV64IMZBS-NEXT: ld t2, 168(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, a1, t2
-; RV64IMZBS-NEXT: ld t2, 160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, t2
-; RV64IMZBS-NEXT: ld t2, 144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, t2
-; RV64IMZBS-NEXT: ld t2, 120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, t2
-; RV64IMZBS-NEXT: ld t2, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, ra, t2
-; RV64IMZBS-NEXT: ld s1, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s1, s9, s1
-; RV64IMZBS-NEXT: ld s3, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s3, s6, s3
-; RV64IMZBS-NEXT: ld s5, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s4, s4, s5
-; RV64IMZBS-NEXT: xor s2, s2, s8
-; RV64IMZBS-NEXT: xor t3, t3, t4
-; RV64IMZBS-NEXT: ld t4, 784(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t5, t4
-; RV64IMZBS-NEXT: ld t5, 760(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, t6, t5
-; RV64IMZBS-NEXT: ld t6, 752(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, s0, t6
-; RV64IMZBS-NEXT: ld s0, 728(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, s0
-; RV64IMZBS-NEXT: ld s0, 696(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, s0
-; RV64IMZBS-NEXT: ld s0, 664(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, s0
-; RV64IMZBS-NEXT: ld s0, 624(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, s0
-; RV64IMZBS-NEXT: ld s0, 576(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, s0
-; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, a1
-; RV64IMZBS-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, a1
-; RV64IMZBS-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, a1
-; RV64IMZBS-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t2, a1
-; RV64IMZBS-NEXT: ld s0, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s1, s0
-; RV64IMZBS-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s1, s3, a1
-; RV64IMZBS-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s3, s4, a1
-; RV64IMZBS-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s2, s2, a1
-; RV64IMZBS-NEXT: ld a1, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, t3, a1
-; RV64IMZBS-NEXT: ld t3, 864(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t4, t3
-; RV64IMZBS-NEXT: ld t4, 840(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t5, t4
-; RV64IMZBS-NEXT: ld t5, 816(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, t6, t5
-; RV64IMZBS-NEXT: ld t6, 808(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t6
-; RV64IMZBS-NEXT: ld t6, 776(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t6
-; RV64IMZBS-NEXT: ld t6, 744(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t6
-; RV64IMZBS-NEXT: ld t6, 712(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, t6
-; RV64IMZBS-NEXT: ld t6, 680(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, t6
-; RV64IMZBS-NEXT: ld t6, 432(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a0, a0, t6
-; RV64IMZBS-NEXT: ld t6, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, t6
-; RV64IMZBS-NEXT: ld t6, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, t6
-; RV64IMZBS-NEXT: ld t6, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, t6
-; RV64IMZBS-NEXT: ld t6, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t2, t6
-; RV64IMZBS-NEXT: ld t6, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, s0, t6
-; RV64IMZBS-NEXT: ld s0, 416(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s1, s0
-; RV64IMZBS-NEXT: ld s1, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s1, s3, s1
-; RV64IMZBS-NEXT: ld s3, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s2, s2, s3
-; RV64IMZBS-NEXT: ld s3, 912(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 248(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a5, 232(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, a5
+; RV64IMZBS-NEXT: ld a5, 192(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a6, 168(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, a6
+; RV64IMZBS-NEXT: ld a6, 136(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a7, 112(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a6, a7
+; RV64IMZBS-NEXT: ld a7, 56(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld ra, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, a7, ra
+; RV64IMZBS-NEXT: xor s9, s11, s9
+; RV64IMZBS-NEXT: xor s4, s6, s4
+; RV64IMZBS-NEXT: xor s3, s3, s5
+; RV64IMZBS-NEXT: ld s5, 888(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t2, s5
+; RV64IMZBS-NEXT: ld s5, 840(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t4, s5
+; RV64IMZBS-NEXT: ld s5, 816(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, t5, s5
+; RV64IMZBS-NEXT: ld s5, 768(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, t6, s5
+; RV64IMZBS-NEXT: ld s5, 712(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s0, s5
+; RV64IMZBS-NEXT: ld s5, 648(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s1, s5
+; RV64IMZBS-NEXT: ld s5, 568(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, s2, s5
+; RV64IMZBS-NEXT: ld s5, 520(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s5
+; RV64IMZBS-NEXT: ld s5, 472(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s5
+; RV64IMZBS-NEXT: xor a0, t3, a0
+; RV64IMZBS-NEXT: ld t3, 440(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a1, t3
+; RV64IMZBS-NEXT: ld t3, 392(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, t3
+; RV64IMZBS-NEXT: ld t3, 328(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a3, t3
+; RV64IMZBS-NEXT: ld t3, 280(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, t3
+; RV64IMZBS-NEXT: ld t3, 224(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, t3
+; RV64IMZBS-NEXT: ld t3, 152(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a6, t3
+; RV64IMZBS-NEXT: ld t3, 72(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, a7, t3
+; RV64IMZBS-NEXT: ld t3, 24(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, s9, t3
+; RV64IMZBS-NEXT: xor s4, s4, s7
+; RV64IMZBS-NEXT: xor t2, s3, t2
+; RV64IMZBS-NEXT: ld s3, 904(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t4, t4, s3
-; RV64IMZBS-NEXT: ld s3, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, s3
-; RV64IMZBS-NEXT: ld s3, 832(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, s3
+; RV64IMZBS-NEXT: ld s3, 856(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, t5, s3
; RV64IMZBS-NEXT: ld s3, 800(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, s3
-; RV64IMZBS-NEXT: ld s3, 768(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, s3
-; RV64IMZBS-NEXT: ld s3, 736(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, s3
-; RV64IMZBS-NEXT: ld s3, 440(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, s3
-; RV64IMZBS-NEXT: ld s3, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t2, s3
-; RV64IMZBS-NEXT: ld s3, 1032(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t6, t6, s3
-; RV64IMZBS-NEXT: ld s3, 1016(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 752(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor s0, s0, s3
-; RV64IMZBS-NEXT: ld s3, 424(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 672(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor s1, s1, s3
-; RV64IMZBS-NEXT: ld s3, 408(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 592(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor s2, s2, s3
-; RV64IMZBS-NEXT: xor t3, a1, t3
-; RV64IMZBS-NEXT: xor t3, t3, t4
-; RV64IMZBS-NEXT: ld t4, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t4
+; RV64IMZBS-NEXT: ld s3, 536(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s3
+; RV64IMZBS-NEXT: ld s3, 480(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s3
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 368(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 320(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 256(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 184(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 96(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: ld a7, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t3, a7
+; RV64IMZBS-NEXT: xor t3, s4, s8
+; RV64IMZBS-NEXT: xor t2, t2, t4
; RV64IMZBS-NEXT: ld t4, 896(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t4
-; RV64IMZBS-NEXT: ld t4, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t4
-; RV64IMZBS-NEXT: ld t4, 824(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, t4
-; RV64IMZBS-NEXT: ld t4, 792(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, t4
-; RV64IMZBS-NEXT: xor a2, a0, a2
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 1000(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, t2, a3
-; RV64IMZBS-NEXT: ld t2, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t6, t2
-; RV64IMZBS-NEXT: ld t4, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, s0, t4
-; RV64IMZBS-NEXT: ld t6, 1024(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t5, t4
+; RV64IMZBS-NEXT: ld t5, 832(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, t6, t5
+; RV64IMZBS-NEXT: ld t6, 792(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, s0, t6
+; RV64IMZBS-NEXT: ld s0, 704(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s1, s0
+; RV64IMZBS-NEXT: ld s1, 640(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s2, s1
+; RV64IMZBS-NEXT: ld s2, 560(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s2
+; RV64IMZBS-NEXT: ld s2, 504(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s2
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 352(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 288(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 216(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 144(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: xor a7, t3, s10
+; RV64IMZBS-NEXT: xor t2, t2, t4
+; RV64IMZBS-NEXT: ld t3, 872(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
+; RV64IMZBS-NEXT: ld t4, 808(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 744(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, s0, t5
+; RV64IMZBS-NEXT: ld t6, 656(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t6, s1, t6
-; RV64IMZBS-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s2, s0
-; RV64IMZBS-NEXT: xor t3, t3, t5
-; RV64IMZBS-NEXT: ld t5, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t5
-; RV64IMZBS-NEXT: ld t5, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t5
-; RV64IMZBS-NEXT: ld t5, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t5
-; RV64IMZBS-NEXT: ld t5, 880(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 576(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s0
+; RV64IMZBS-NEXT: ld s0, 512(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s0
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 312(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 240(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 160(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: xor a7, t2, t3
+; RV64IMZBS-NEXT: ld t2, 824(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t4, t2
+; RV64IMZBS-NEXT: ld t3, 760(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
+; RV64IMZBS-NEXT: ld t4, 688(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 584(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t0, t0, t5
-; RV64IMZBS-NEXT: ld t5, 848(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t5, 528(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t1, t1, t5
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: ld a4, 1088(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: ld a4, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t2, a4
-; RV64IMZBS-NEXT: ld t2, 1072(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 32(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: xor a6, a7, t2
+; RV64IMZBS-NEXT: ld a7, 784(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t3, a7
+; RV64IMZBS-NEXT: ld t2, 696(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t2, t4, t2
-; RV64IMZBS-NEXT: ld t4, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t6, t4
-; RV64IMZBS-NEXT: ld t5, 1040(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, s0, t5
-; RV64IMZBS-NEXT: xor a5, t3, a5
+; RV64IMZBS-NEXT: ld t3, 616(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, t3
+; RV64IMZBS-NEXT: ld t3, 912(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, t3
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 200(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 104(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 920(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: xor a5, a6, a7
+; RV64IMZBS-NEXT: ld a6, 720(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, t2, a6
+; RV64IMZBS-NEXT: ld a7, 624(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t0, a7
+; RV64IMZBS-NEXT: xor a1, a0, a1
+; RV64IMZBS-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, a0
+; RV64IMZBS-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a3, a0
; RV64IMZBS-NEXT: xor a5, a5, a6
-; RV64IMZBS-NEXT: xor a3, a2, a3
-; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: slli a1, a1, 56
-; RV64IMZBS-NEXT: slli a0, a0, 56
-; RV64IMZBS-NEXT: ld t6, 960(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, t3, t6
-; RV64IMZBS-NEXT: and a2, a2, t6
-; RV64IMZBS-NEXT: slli a4, a4, 40
-; RV64IMZBS-NEXT: slli a2, a2, 40
-; RV64IMZBS-NEXT: or a1, a1, a4
-; RV64IMZBS-NEXT: srli a4, a5, 8
-; RV64IMZBS-NEXT: or a0, a0, a2
-; RV64IMZBS-NEXT: srli a2, a3, 8
-; RV64IMZBS-NEXT: ld a6, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a4, a6
-; RV64IMZBS-NEXT: and a2, a2, a6
-; RV64IMZBS-NEXT: xor a6, a5, a7
+; RV64IMZBS-NEXT: lui a0, %hi(.LCPI6_0)
+; RV64IMZBS-NEXT: ld a0, %lo(.LCPI6_0)(a0)
+; RV64IMZBS-NEXT: ld a6, 632(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: xor a1, a1, a2
+; RV64IMZBS-NEXT: ld a2, 128(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: xor a3, a5, a6
+; RV64IMZBS-NEXT: xor a1, a1, a2
+; RV64IMZBS-NEXT: xor a2, a3, t1
+; RV64IMZBS-NEXT: xor a1, a1, a4
+; RV64IMZBS-NEXT: srli a3, a2, 40
+; RV64IMZBS-NEXT: srli a4, a2, 56
+; RV64IMZBS-NEXT: srli a5, a2, 24
+; RV64IMZBS-NEXT: srli a6, a2, 8
+; RV64IMZBS-NEXT: srliw a7, a2, 24
; RV64IMZBS-NEXT: lui t3, 4080
+; RV64IMZBS-NEXT: and t0, a2, t3
+; RV64IMZBS-NEXT: srli t1, a1, 8
+; RV64IMZBS-NEXT: ld t2, 960(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: and a6, a6, t2
+; RV64IMZBS-NEXT: and t1, t1, t2
+; RV64IMZBS-NEXT: slli t2, a2, 56
+; RV64IMZBS-NEXT: ld t4, 944(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: and a2, a2, t4
+; RV64IMZBS-NEXT: and a3, a3, t4
+; RV64IMZBS-NEXT: or a3, a3, a4
+; RV64IMZBS-NEXT: srli a4, a1, 40
; RV64IMZBS-NEXT: and a5, a5, t3
-; RV64IMZBS-NEXT: xor a7, a3, t2
-; RV64IMZBS-NEXT: and a3, a3, t3
-; RV64IMZBS-NEXT: xor t0, a6, t0
-; RV64IMZBS-NEXT: srli a6, a6, 24
-; RV64IMZBS-NEXT: xor t2, a7, t4
-; RV64IMZBS-NEXT: srli a7, a7, 24
-; RV64IMZBS-NEXT: and a6, a6, t3
-; RV64IMZBS-NEXT: and a7, a7, t3
-; RV64IMZBS-NEXT: or a4, a4, a6
-; RV64IMZBS-NEXT: srli a6, t0, 40
-; RV64IMZBS-NEXT: or a2, a2, a7
-; RV64IMZBS-NEXT: srli a7, t2, 40
-; RV64IMZBS-NEXT: and a6, a6, t6
-; RV64IMZBS-NEXT: and a7, a7, t6
-; RV64IMZBS-NEXT: slli a5, a5, 24
-; RV64IMZBS-NEXT: xor t1, t0, t1
-; RV64IMZBS-NEXT: srliw t0, t0, 24
-; RV64IMZBS-NEXT: slli t0, t0, 32
-; RV64IMZBS-NEXT: or a5, a5, t0
-; RV64IMZBS-NEXT: srli t0, t1, 56
-; RV64IMZBS-NEXT: or a6, a6, t0
-; RV64IMZBS-NEXT: slli a3, a3, 24
-; RV64IMZBS-NEXT: xor t0, t2, t5
-; RV64IMZBS-NEXT: srliw t1, t2, 24
-; RV64IMZBS-NEXT: slli t1, t1, 32
-; RV64IMZBS-NEXT: or a3, a3, t1
-; RV64IMZBS-NEXT: srli t0, t0, 56
-; RV64IMZBS-NEXT: or a7, a7, t0
-; RV64IMZBS-NEXT: or a1, a1, a5
+; RV64IMZBS-NEXT: or a5, a6, a5
+; RV64IMZBS-NEXT: srli a6, a1, 56
+; RV64IMZBS-NEXT: slli a7, a7, 32
+; RV64IMZBS-NEXT: slli t0, t0, 24
+; RV64IMZBS-NEXT: or a7, t0, a7
+; RV64IMZBS-NEXT: srli t0, a1, 24
+; RV64IMZBS-NEXT: slli a2, a2, 40
+; RV64IMZBS-NEXT: or a2, t2, a2
+; RV64IMZBS-NEXT: srliw t2, a1, 24
+; RV64IMZBS-NEXT: and t0, t0, t3
+; RV64IMZBS-NEXT: and t3, a1, t3
+; RV64IMZBS-NEXT: and a4, a4, t4
+; RV64IMZBS-NEXT: and t4, a1, t4
+; RV64IMZBS-NEXT: slli a1, a1, 56
+; RV64IMZBS-NEXT: slli t2, t2, 32
+; RV64IMZBS-NEXT: slli t3, t3, 24
+; RV64IMZBS-NEXT: slli t4, t4, 40
; RV64IMZBS-NEXT: or a4, a4, a6
-; RV64IMZBS-NEXT: or a0, a0, a3
+; RV64IMZBS-NEXT: or a6, t1, t0
+; RV64IMZBS-NEXT: or t0, t3, t2
+; RV64IMZBS-NEXT: or a1, a1, t4
+; RV64IMZBS-NEXT: or a3, a5, a3
; RV64IMZBS-NEXT: or a2, a2, a7
+; RV64IMZBS-NEXT: or a4, a6, a4
+; RV64IMZBS-NEXT: or a1, a1, t0
+; RV64IMZBS-NEXT: or a2, a2, a3
; RV64IMZBS-NEXT: or a1, a1, a4
-; RV64IMZBS-NEXT: or a0, a0, a2
-; RV64IMZBS-NEXT: srli a2, a1, 4
-; RV64IMZBS-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, a1, a4
-; RV64IMZBS-NEXT: srli a3, a0, 4
-; RV64IMZBS-NEXT: and a0, a0, a4
-; RV64IMZBS-NEXT: and a2, a2, a4
-; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: srli a3, a2, 4
+; RV64IMZBS-NEXT: ld a5, 952(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: and a2, a2, a5
+; RV64IMZBS-NEXT: srli a4, a1, 4
+; RV64IMZBS-NEXT: and a1, a1, a5
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: slli a2, a2, 4
+; RV64IMZBS-NEXT: and a4, a4, a5
; RV64IMZBS-NEXT: slli a1, a1, 4
-; RV64IMZBS-NEXT: slli a0, a0, 4
-; RV64IMZBS-NEXT: or a1, a2, a1
-; RV64IMZBS-NEXT: or a0, a3, a0
-; RV64IMZBS-NEXT: srli a2, a1, 2
-; RV64IMZBS-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, a1, a4
-; RV64IMZBS-NEXT: srli a3, a0, 2
-; RV64IMZBS-NEXT: and a0, a0, a4
-; RV64IMZBS-NEXT: and a2, a2, a4
-; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: or a2, a3, a2
+; RV64IMZBS-NEXT: or a1, a4, a1
+; RV64IMZBS-NEXT: srli a3, a2, 2
+; RV64IMZBS-NEXT: ld a5, 936(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: and a2, a2, a5
+; RV64IMZBS-NEXT: srli a4, a1, 2
+; RV64IMZBS-NEXT: and a1, a1, a5
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: slli a2, a2, 2
+; RV64IMZBS-NEXT: and a4, a4, a5
; RV64IMZBS-NEXT: slli a1, a1, 2
-; RV64IMZBS-NEXT: or a1, a2, a1
-; RV64IMZBS-NEXT: lui a2, %hi(.LCPI6_0)
-; RV64IMZBS-NEXT: ld a2, %lo(.LCPI6_0)(a2)
-; RV64IMZBS-NEXT: slli a0, a0, 2
-; RV64IMZBS-NEXT: or a0, a3, a0
-; RV64IMZBS-NEXT: srli a3, a1, 1
-; RV64IMZBS-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, a1, a4
-; RV64IMZBS-NEXT: and a4, a0, a4
-; RV64IMZBS-NEXT: srli a0, a0, 1
-; RV64IMZBS-NEXT: and a3, a3, a2
-; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: or a2, a3, a2
+; RV64IMZBS-NEXT: or a1, a4, a1
+; RV64IMZBS-NEXT: srli a3, a2, 1
+; RV64IMZBS-NEXT: ld a5, 928(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: and a2, a2, a5
+; RV64IMZBS-NEXT: srli a4, a1, 1
+; RV64IMZBS-NEXT: and a1, a1, a5
+; RV64IMZBS-NEXT: and a3, a3, a0
+; RV64IMZBS-NEXT: slli a2, a2, 1
+; RV64IMZBS-NEXT: and a0, a4, a0
; RV64IMZBS-NEXT: slli a1, a1, 1
-; RV64IMZBS-NEXT: or a1, a3, a1
-; RV64IMZBS-NEXT: slli a4, a4, 1
-; RV64IMZBS-NEXT: or a0, a0, a4
-; RV64IMZBS-NEXT: srli a1, a1, 1
+; RV64IMZBS-NEXT: or a2, a3, a2
+; RV64IMZBS-NEXT: or a0, a0, a1
+; RV64IMZBS-NEXT: srli a2, a2, 1
; RV64IMZBS-NEXT: srli a0, a0, 1
-; RV64IMZBS-NEXT: ld a2, 984(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: sd a1, 0(a2)
-; RV64IMZBS-NEXT: sd a0, 8(a2)
-; RV64IMZBS-NEXT: ld a2, 992(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: sd a1, 0(a2)
-; RV64IMZBS-NEXT: sd a0, 8(a2)
-; RV64IMZBS-NEXT: ld ra, 1192(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 1184(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 1176(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s2, 1168(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s3, 1160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s4, 1152(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s5, 1144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s6, 1136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s7, 1128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s8, 1120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s9, 1112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s10, 1104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 1096(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 1200
+; RV64IMZBS-NEXT: ld a1, 968(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: sd a2, 0(a1)
+; RV64IMZBS-NEXT: sd a0, 8(a1)
+; RV64IMZBS-NEXT: ld a1, 976(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: sd a2, 0(a1)
+; RV64IMZBS-NEXT: sd a0, 8(a1)
+; RV64IMZBS-NEXT: ld ra, 1080(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 1072(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 1064(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 1056(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 1048(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s4, 1040(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s5, 1032(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s6, 1024(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s7, 1016(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s8, 1008(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s9, 1000(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s10, 992(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s11, 984(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 1088
; RV64IMZBS-NEXT: ret
%x.ext = zext <2 x i64> %x to <2 x i128>
%y.ext = zext <2 x i64> %y to <2 x i128>
@@ -13831,5 +12858,6 @@ define void @commutative_clmulh_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
; CHECK-I: {{.*}}
+; CHECK-M: {{.*}}
+; CHECK-ZBS: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/clmulr.ll b/llvm/test/CodeGen/RISCV/clmulr.ll
index 03fa2ba93d8d7..62a7bae169c07 100644
--- a/llvm/test/CodeGen/RISCV/clmulr.ll
+++ b/llvm/test/CodeGen/RISCV/clmulr.ll
@@ -7,478 +7,33 @@
; RUN: llc -mtriple=riscv64 -mattr=+m,+zbs -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZBS,RV64IMZBS
define i4 @clmulr_i4(i4 %a, i4 %b) nounwind {
-; RV32I-LABEL: clmulr_i4:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: andi s0, a0, 15
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s2, s3, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: srli a0, a0, 3
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: clmulr_i4:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: andi s0, a0, 15
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: srli a0, a0, 3
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; CHECK-M-LABEL: clmulr_i4:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: andi a0, a0, 15
-; CHECK-M-NEXT: andi a2, a1, 2
-; CHECK-M-NEXT: andi a3, a1, 1
-; CHECK-M-NEXT: andi a4, a1, 4
-; CHECK-M-NEXT: andi a1, a1, 8
-; CHECK-M-NEXT: mul a2, a0, a2
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a2, a3, a2
-; CHECK-M-NEXT: xor a0, a4, a0
-; CHECK-M-NEXT: xor a0, a2, a0
-; CHECK-M-NEXT: srli a0, a0, 3
-; CHECK-M-NEXT: ret
-;
-; CHECK-ZBS-LABEL: clmulr_i4:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: andi a0, a0, 15
-; CHECK-ZBS-NEXT: andi a2, a1, 2
-; CHECK-ZBS-NEXT: andi a3, a1, 1
-; CHECK-ZBS-NEXT: andi a4, a1, 4
-; CHECK-ZBS-NEXT: andi a1, a1, 8
-; CHECK-ZBS-NEXT: mul a2, a0, a2
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a2, a3, a2
-; CHECK-ZBS-NEXT: xor a0, a4, a0
-; CHECK-ZBS-NEXT: xor a0, a2, a0
-; CHECK-ZBS-NEXT: srli a0, a0, 3
-; CHECK-ZBS-NEXT: ret
+; CHECK-LABEL: clmulr_i4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi a0, a0, 15
+; CHECK-NEXT: andi a2, a1, 2
+; CHECK-NEXT: andi a3, a1, 4
+; CHECK-NEXT: slli a4, a0, 1
+; CHECK-NEXT: seqz a2, a2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a2, a2, a4
+; CHECK-NEXT: slli a4, a0, 2
+; CHECK-NEXT: seqz a3, a3
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: andi a4, a1, 1
+; CHECK-NEXT: andi a1, a1, 8
+; CHECK-NEXT: seqz a4, a4
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a4, a4, a0
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: xor a2, a4, a2
+; CHECK-NEXT: xor a0, a3, a0
+; CHECK-NEXT: xor a0, a2, a0
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: ret
%a.ext = zext i4 %a to i8
%b.ext = zext i4 %b to i8
%clmul = call i8 @llvm.clmul.i8(i8 %a.ext, i8 %b.ext)
@@ -488,478 +43,33 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind {
}
define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind {
-; RV32I-LABEL: clmulr_i4_bitreverse:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: andi s0, a0, 15
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s2, s3, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: srli a0, a0, 3
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: clmulr_i4_bitreverse:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: andi s0, a0, 15
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: srli a0, a0, 3
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; CHECK-M-LABEL: clmulr_i4_bitreverse:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: andi a0, a0, 15
-; CHECK-M-NEXT: andi a2, a1, 2
-; CHECK-M-NEXT: andi a3, a1, 1
-; CHECK-M-NEXT: andi a4, a1, 4
-; CHECK-M-NEXT: andi a1, a1, 8
-; CHECK-M-NEXT: mul a2, a0, a2
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a2, a3, a2
-; CHECK-M-NEXT: xor a0, a4, a0
-; CHECK-M-NEXT: xor a0, a2, a0
-; CHECK-M-NEXT: srli a0, a0, 3
-; CHECK-M-NEXT: ret
-;
-; CHECK-ZBS-LABEL: clmulr_i4_bitreverse:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: andi a0, a0, 15
-; CHECK-ZBS-NEXT: andi a2, a1, 2
-; CHECK-ZBS-NEXT: andi a3, a1, 1
-; CHECK-ZBS-NEXT: andi a4, a1, 4
-; CHECK-ZBS-NEXT: andi a1, a1, 8
-; CHECK-ZBS-NEXT: mul a2, a0, a2
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a2, a3, a2
-; CHECK-ZBS-NEXT: xor a0, a4, a0
-; CHECK-ZBS-NEXT: xor a0, a2, a0
-; CHECK-ZBS-NEXT: srli a0, a0, 3
-; CHECK-ZBS-NEXT: ret
+; CHECK-LABEL: clmulr_i4_bitreverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi a0, a0, 15
+; CHECK-NEXT: andi a2, a1, 2
+; CHECK-NEXT: andi a3, a1, 4
+; CHECK-NEXT: slli a4, a0, 1
+; CHECK-NEXT: seqz a2, a2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a2, a2, a4
+; CHECK-NEXT: slli a4, a0, 2
+; CHECK-NEXT: seqz a3, a3
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: andi a4, a1, 1
+; CHECK-NEXT: andi a1, a1, 8
+; CHECK-NEXT: seqz a4, a4
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a4, a4, a0
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: xor a2, a4, a2
+; CHECK-NEXT: xor a0, a3, a0
+; CHECK-NEXT: xor a0, a2, a0
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: ret
%a.rev = call i4 @llvm.bitreverse.i4(i4 %a)
%b.rev = call i4 @llvm.bitreverse.i4(i4 %b)
%res.rev = call i4 @llvm.clmul.i4(i4 %a.rev, i4 %b.rev)
@@ -968,502 +78,57 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind {
}
define i8 @clmulr_i8(i8 %a, i8 %b) nounwind {
-; RV32I-LABEL: clmulr_i8:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: zext.b s0, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s2, s3, a0
-; RV32I-NEXT: andi a1, s1, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: srli a0, a0, 7
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: clmulr_i8:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: zext.b s0, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: andi a1, s1, 128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: srli a0, a0, 7
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; CHECK-M-LABEL: clmulr_i8:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: zext.b a0, a0
-; CHECK-M-NEXT: andi a2, a1, 2
-; CHECK-M-NEXT: andi a3, a1, 1
-; CHECK-M-NEXT: andi a4, a1, 4
-; CHECK-M-NEXT: andi a5, a1, 8
-; CHECK-M-NEXT: mul a2, a0, a2
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: xor a2, a3, a2
-; CHECK-M-NEXT: andi a3, a1, 16
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: xor a4, a4, a5
-; CHECK-M-NEXT: andi a5, a1, 32
-; CHECK-M-NEXT: mul a3, a0, a3
-; CHECK-M-NEXT: mul a5, a0, a5
-; CHECK-M-NEXT: xor a3, a3, a5
-; CHECK-M-NEXT: xor a2, a2, a4
-; CHECK-M-NEXT: andi a4, a1, 64
-; CHECK-M-NEXT: andi a1, a1, 128
-; CHECK-M-NEXT: mul a4, a0, a4
-; CHECK-M-NEXT: xor a3, a3, a4
-; CHECK-M-NEXT: xor a2, a2, a3
-; CHECK-M-NEXT: mul a0, a0, a1
-; CHECK-M-NEXT: xor a0, a2, a0
-; CHECK-M-NEXT: srli a0, a0, 7
-; CHECK-M-NEXT: ret
-;
-; CHECK-ZBS-LABEL: clmulr_i8:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: zext.b a0, a0
-; CHECK-ZBS-NEXT: andi a2, a1, 2
-; CHECK-ZBS-NEXT: andi a3, a1, 1
-; CHECK-ZBS-NEXT: andi a4, a1, 4
-; CHECK-ZBS-NEXT: andi a5, a1, 8
-; CHECK-ZBS-NEXT: mul a2, a0, a2
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: xor a2, a3, a2
-; CHECK-ZBS-NEXT: andi a3, a1, 16
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: xor a4, a4, a5
-; CHECK-ZBS-NEXT: andi a5, a1, 32
-; CHECK-ZBS-NEXT: mul a3, a0, a3
-; CHECK-ZBS-NEXT: mul a5, a0, a5
-; CHECK-ZBS-NEXT: xor a3, a3, a5
-; CHECK-ZBS-NEXT: xor a2, a2, a4
-; CHECK-ZBS-NEXT: andi a4, a1, 64
-; CHECK-ZBS-NEXT: andi a1, a1, 128
-; CHECK-ZBS-NEXT: mul a4, a0, a4
-; CHECK-ZBS-NEXT: xor a3, a3, a4
-; CHECK-ZBS-NEXT: xor a2, a2, a3
-; CHECK-ZBS-NEXT: mul a0, a0, a1
-; CHECK-ZBS-NEXT: xor a0, a2, a0
-; CHECK-ZBS-NEXT: srli a0, a0, 7
-; CHECK-ZBS-NEXT: ret
+; CHECK-LABEL: clmulr_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: zext.b a0, a0
+; CHECK-NEXT: andi a2, a1, 2
+; CHECK-NEXT: andi a3, a1, 4
+; CHECK-NEXT: andi a4, a1, 8
+; CHECK-NEXT: andi a5, a1, 16
+; CHECK-NEXT: andi a6, a1, 32
+; CHECK-NEXT: andi a7, a1, 64
+; CHECK-NEXT: slli t0, a0, 1
+; CHECK-NEXT: seqz a2, a2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a2, a2, t0
+; CHECK-NEXT: slli t0, a0, 2
+; CHECK-NEXT: seqz a3, a3
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, t0
+; CHECK-NEXT: slli t0, a0, 3
+; CHECK-NEXT: seqz a4, a4
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a4, a4, t0
+; CHECK-NEXT: slli t0, a0, 4
+; CHECK-NEXT: seqz a5, a5
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a5, a5, t0
+; CHECK-NEXT: slli t0, a0, 5
+; CHECK-NEXT: seqz a6, a6
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a6, a6, t0
+; CHECK-NEXT: slli t0, a0, 6
+; CHECK-NEXT: seqz a7, a7
+; CHECK-NEXT: addi a7, a7, -1
+; CHECK-NEXT: and a7, a7, t0
+; CHECK-NEXT: andi t0, a1, 1
+; CHECK-NEXT: seqz t0, t0
+; CHECK-NEXT: addi t0, t0, -1
+; CHECK-NEXT: and t0, t0, a0
+; CHECK-NEXT: xor a2, t0, a2
+; CHECK-NEXT: xor a3, a3, a4
+; CHECK-NEXT: xor a4, a5, a6
+; CHECK-NEXT: andi a1, a1, 128
+; CHECK-NEXT: slli a0, a0, 7
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: xor a2, a2, a3
+; CHECK-NEXT: xor a3, a4, a7
+; CHECK-NEXT: xor a2, a2, a3
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: xor a0, a2, a0
+; CHECK-NEXT: srli a0, a0, 7
+; CHECK-NEXT: ret
%a.ext = zext i8 %a to i16
%b.ext = zext i8 %b to i16
%clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext)
@@ -1475,575 +140,424 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind {
define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
; RV32I-LABEL: clmulr_i16:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, a0, s2
-; RV32I-NEXT: andi a1, s1, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: andi a1, s1, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, s1, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: andi a1, s1, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a1, 1
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 2
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: lui a1, 4
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: lui a1, 8
-; RV32I-NEXT: xor s2, s3, a0
-; RV32I-NEXT: and a1, s1, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s2, a0
+; RV32I-NEXT: andi a2, a1, 2
+; RV32I-NEXT: andi a3, a1, 4
+; RV32I-NEXT: andi a4, a1, 8
+; RV32I-NEXT: andi a6, a1, 16
+; RV32I-NEXT: andi a7, a1, 32
+; RV32I-NEXT: andi t1, a1, 64
+; RV32I-NEXT: andi t3, a1, 128
+; RV32I-NEXT: andi t4, a1, 256
+; RV32I-NEXT: andi t5, a1, 512
+; RV32I-NEXT: andi t6, a1, 1024
+; RV32I-NEXT: li a5, 1
+; RV32I-NEXT: lui t0, 1
+; RV32I-NEXT: lui t2, 2
+; RV32I-NEXT: lui s0, 4
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: seqz a6, a6
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: seqz t1, t1
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: seqz t5, t5
+; RV32I-NEXT: seqz t6, t6
+; RV32I-NEXT: slli s1, a0, 1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a2, a2, s1
+; RV32I-NEXT: slli s1, a0, 2
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and a3, a3, s1
+; RV32I-NEXT: slli s1, a0, 3
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a4, a4, s1
+; RV32I-NEXT: slli s1, a0, 4
+; RV32I-NEXT: addi a6, a6, -1
+; RV32I-NEXT: and a6, a6, s1
+; RV32I-NEXT: slli s1, a0, 5
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: and a7, a7, s1
+; RV32I-NEXT: slli s1, a0, 6
+; RV32I-NEXT: addi t1, t1, -1
+; RV32I-NEXT: and t1, t1, s1
+; RV32I-NEXT: slli s1, a0, 7
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: and t3, t3, s1
+; RV32I-NEXT: slli s1, a0, 8
+; RV32I-NEXT: addi t4, t4, -1
+; RV32I-NEXT: and t4, t4, s1
+; RV32I-NEXT: slli s1, a0, 9
+; RV32I-NEXT: addi t5, t5, -1
+; RV32I-NEXT: and t5, t5, s1
+; RV32I-NEXT: slli s1, a0, 10
+; RV32I-NEXT: addi t6, t6, -1
+; RV32I-NEXT: and t6, t6, s1
+; RV32I-NEXT: lui s1, 8
+; RV32I-NEXT: slli a5, a5, 11
+; RV32I-NEXT: and t0, a1, t0
+; RV32I-NEXT: and t2, a1, t2
+; RV32I-NEXT: and s0, a1, s0
+; RV32I-NEXT: and s1, a1, s1
+; RV32I-NEXT: and a5, a1, a5
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a0
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: xor a2, a6, a7
+; RV32I-NEXT: seqz a4, t0
+; RV32I-NEXT: xor a6, t3, t4
+; RV32I-NEXT: slli a7, a0, 12
+; RV32I-NEXT: seqz t0, t2
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a4, a4, a7
+; RV32I-NEXT: slli a7, a0, 13
+; RV32I-NEXT: seqz t2, s0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: and a7, t0, a7
+; RV32I-NEXT: slli t0, a0, 14
+; RV32I-NEXT: seqz t3, s1
+; RV32I-NEXT: addi t2, t2, -1
+; RV32I-NEXT: and t0, t2, t0
+; RV32I-NEXT: slli t2, a0, 15
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: and t2, t3, t2
+; RV32I-NEXT: xor a1, a1, a3
+; RV32I-NEXT: xor a2, a2, t1
+; RV32I-NEXT: xor a3, a6, t5
+; RV32I-NEXT: slli a0, a0, 11
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: and a0, a5, a0
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a2, a3, t6
+; RV32I-NEXT: xor a0, a0, a4
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a0, a0, a7
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: xor a1, t0, t2
+; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a0, a0, 15
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: clmulr_i16:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 256
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 512
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 1024
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: li a1, 1
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: slli a1, a1, 11
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
+; RV64I-NEXT: andi a2, a1, 2
+; RV64I-NEXT: andi a3, a1, 4
+; RV64I-NEXT: andi a4, a1, 8
+; RV64I-NEXT: andi a6, a1, 16
+; RV64I-NEXT: andi a7, a1, 32
+; RV64I-NEXT: andi t1, a1, 64
+; RV64I-NEXT: andi t3, a1, 128
+; RV64I-NEXT: andi t4, a1, 256
+; RV64I-NEXT: andi t5, a1, 512
+; RV64I-NEXT: andi t6, a1, 1024
+; RV64I-NEXT: li a5, 1
+; RV64I-NEXT: lui t0, 1
+; RV64I-NEXT: lui t2, 2
+; RV64I-NEXT: lui s0, 4
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: seqz a6, a6
+; RV64I-NEXT: seqz a7, a7
+; RV64I-NEXT: seqz t1, t1
+; RV64I-NEXT: seqz t3, t3
+; RV64I-NEXT: seqz t4, t4
+; RV64I-NEXT: seqz t5, t5
+; RV64I-NEXT: seqz t6, t6
+; RV64I-NEXT: slli s1, a0, 1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a2, a2, s1
+; RV64I-NEXT: slli s1, a0, 2
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a3, a3, s1
+; RV64I-NEXT: slli s1, a0, 3
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, s1
+; RV64I-NEXT: slli s1, a0, 4
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a6, a6, s1
+; RV64I-NEXT: slli s1, a0, 5
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a7, a7, s1
+; RV64I-NEXT: slli s1, a0, 6
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: and t1, t1, s1
+; RV64I-NEXT: slli s1, a0, 7
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: and t3, t3, s1
+; RV64I-NEXT: slli s1, a0, 8
+; RV64I-NEXT: addi t4, t4, -1
+; RV64I-NEXT: and t4, t4, s1
+; RV64I-NEXT: slli s1, a0, 9
+; RV64I-NEXT: addi t5, t5, -1
+; RV64I-NEXT: and t5, t5, s1
+; RV64I-NEXT: slli s1, a0, 10
+; RV64I-NEXT: addi t6, t6, -1
+; RV64I-NEXT: and t6, t6, s1
+; RV64I-NEXT: lui s1, 8
+; RV64I-NEXT: slli a5, a5, 11
+; RV64I-NEXT: and t0, a1, t0
+; RV64I-NEXT: and t2, a1, t2
+; RV64I-NEXT: and s0, a1, s0
+; RV64I-NEXT: and s1, a1, s1
+; RV64I-NEXT: and a5, a1, a5
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: seqz a1, a1
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a1, a1, a0
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a3, a3, a4
+; RV64I-NEXT: xor a2, a6, a7
+; RV64I-NEXT: seqz a4, t0
+; RV64I-NEXT: xor a6, t3, t4
+; RV64I-NEXT: slli a7, a0, 12
+; RV64I-NEXT: seqz t0, t2
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, a7
+; RV64I-NEXT: slli a7, a0, 13
+; RV64I-NEXT: seqz t2, s0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: and a7, t0, a7
+; RV64I-NEXT: slli t0, a0, 14
+; RV64I-NEXT: seqz t3, s1
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and t0, t2, t0
+; RV64I-NEXT: slli t2, a0, 15
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: and t2, t3, t2
+; RV64I-NEXT: xor a1, a1, a3
+; RV64I-NEXT: xor a2, a2, t1
+; RV64I-NEXT: xor a3, a6, t5
+; RV64I-NEXT: slli a0, a0, 11
+; RV64I-NEXT: seqz a5, a5
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: and a0, a5, a0
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a2, a3, t6
+; RV64I-NEXT: xor a0, a0, a4
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: xor a0, a0, a7
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: xor a1, t0, t2
+; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a0, a0, 15
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32IM-LABEL: clmulr_i16:
; RV32IM: # %bb.0:
+; RV32IM-NEXT: addi sp, sp, -16
+; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32IM-NEXT: slli a0, a0, 16
; RV32IM-NEXT: andi a2, a1, 2
-; RV32IM-NEXT: andi a3, a1, 1
-; RV32IM-NEXT: andi a4, a1, 4
-; RV32IM-NEXT: andi a5, a1, 8
-; RV32IM-NEXT: andi a6, a1, 16
+; RV32IM-NEXT: andi a3, a1, 4
+; RV32IM-NEXT: andi a4, a1, 8
+; RV32IM-NEXT: andi a5, a1, 16
; RV32IM-NEXT: andi a7, a1, 32
+; RV32IM-NEXT: andi t0, a1, 64
+; RV32IM-NEXT: andi t1, a1, 128
+; RV32IM-NEXT: andi t2, a1, 256
+; RV32IM-NEXT: andi t3, a1, 512
+; RV32IM-NEXT: andi t4, a1, 1024
+; RV32IM-NEXT: li a6, 1
+; RV32IM-NEXT: lui t5, 1
+; RV32IM-NEXT: lui t6, 2
+; RV32IM-NEXT: lui s0, 4
; RV32IM-NEXT: srli a0, a0, 16
-; RV32IM-NEXT: mul a2, a0, a2
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: andi a3, a1, 64
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: andi a5, a1, 128
-; RV32IM-NEXT: mul a6, a0, a6
-; RV32IM-NEXT: mul a7, a0, a7
-; RV32IM-NEXT: xor a6, a6, a7
-; RV32IM-NEXT: andi a7, a1, 256
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: mul a7, a0, a7
-; RV32IM-NEXT: xor a5, a5, a7
-; RV32IM-NEXT: andi a7, a1, 512
-; RV32IM-NEXT: xor a2, a2, a4
-; RV32IM-NEXT: li a4, 1
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: xor a3, a6, a3
-; RV32IM-NEXT: lui a6, 1
-; RV32IM-NEXT: mul a7, a0, a7
-; RV32IM-NEXT: xor a5, a5, a7
-; RV32IM-NEXT: lui a7, 2
-; RV32IM-NEXT: slli a4, a4, 11
-; RV32IM-NEXT: and a6, a1, a6
-; RV32IM-NEXT: and a4, a1, a4
-; RV32IM-NEXT: mul a6, a0, a6
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: xor a4, a4, a6
-; RV32IM-NEXT: lui a6, 4
-; RV32IM-NEXT: xor a2, a2, a3
-; RV32IM-NEXT: lui a3, 8
-; RV32IM-NEXT: and a7, a1, a7
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: seqz a4, a4
+; RV32IM-NEXT: seqz a5, a5
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: seqz t0, t0
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: seqz t2, t2
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: seqz t4, t4
+; RV32IM-NEXT: slli s1, a0, 1
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: and a2, a2, s1
+; RV32IM-NEXT: slli s1, a0, 2
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and a3, a3, s1
+; RV32IM-NEXT: slli s1, a0, 3
+; RV32IM-NEXT: addi a4, a4, -1
+; RV32IM-NEXT: and a4, a4, s1
+; RV32IM-NEXT: slli s1, a0, 4
+; RV32IM-NEXT: addi a5, a5, -1
+; RV32IM-NEXT: and a5, a5, s1
+; RV32IM-NEXT: slli s1, a0, 5
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: and a7, a7, s1
+; RV32IM-NEXT: slli s1, a0, 6
+; RV32IM-NEXT: addi t0, t0, -1
+; RV32IM-NEXT: and t0, t0, s1
+; RV32IM-NEXT: slli s1, a0, 7
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: and t1, t1, s1
+; RV32IM-NEXT: slli s1, a0, 8
+; RV32IM-NEXT: addi t2, t2, -1
+; RV32IM-NEXT: and t2, t2, s1
+; RV32IM-NEXT: slli s1, a0, 9
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: and t3, t3, s1
+; RV32IM-NEXT: slli s1, a0, 10
+; RV32IM-NEXT: addi t4, t4, -1
+; RV32IM-NEXT: and t4, t4, s1
+; RV32IM-NEXT: lui s1, 8
+; RV32IM-NEXT: slli a6, a6, 11
+; RV32IM-NEXT: and t5, a1, t5
+; RV32IM-NEXT: and t6, a1, t6
+; RV32IM-NEXT: and s0, a1, s0
+; RV32IM-NEXT: and s1, a1, s1
; RV32IM-NEXT: and a6, a1, a6
-; RV32IM-NEXT: and a3, a1, a3
-; RV32IM-NEXT: andi a1, a1, 1024
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: xor a1, a5, a1
-; RV32IM-NEXT: mul a5, a0, a7
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: mul a2, a0, a6
-; RV32IM-NEXT: xor a2, a4, a2
+; RV32IM-NEXT: andi a1, a1, 1
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: mul t5, a0, t5
+; RV32IM-NEXT: mul t6, a0, t6
+; RV32IM-NEXT: mul s0, a0, s0
+; RV32IM-NEXT: mul s1, a0, s1
+; RV32IM-NEXT: and a1, a1, a0
+; RV32IM-NEXT: mul a0, a0, a6
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: xor a2, a5, a7
+; RV32IM-NEXT: xor a4, t1, t2
+; RV32IM-NEXT: xor a0, a0, t5
+; RV32IM-NEXT: xor a1, a1, a3
+; RV32IM-NEXT: xor a2, a2, t0
+; RV32IM-NEXT: xor a3, a4, t3
+; RV32IM-NEXT: xor a0, a0, t6
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: xor a2, a3, t4
+; RV32IM-NEXT: xor a0, a0, s0
; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: mul a0, a0, a3
+; RV32IM-NEXT: xor a0, a0, s1
; RV32IM-NEXT: xor a0, a1, a0
; RV32IM-NEXT: srli a0, a0, 15
+; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 16
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: clmulr_i16:
; RV64IM: # %bb.0:
+; RV64IM-NEXT: addi sp, sp, -16
+; RV64IM-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 0(sp) # 8-byte Folded Spill
; RV64IM-NEXT: slli a0, a0, 48
; RV64IM-NEXT: andi a2, a1, 2
-; RV64IM-NEXT: andi a3, a1, 1
-; RV64IM-NEXT: andi a4, a1, 4
-; RV64IM-NEXT: andi a5, a1, 8
-; RV64IM-NEXT: andi a6, a1, 16
+; RV64IM-NEXT: andi a3, a1, 4
+; RV64IM-NEXT: andi a4, a1, 8
+; RV64IM-NEXT: andi a5, a1, 16
; RV64IM-NEXT: andi a7, a1, 32
+; RV64IM-NEXT: andi t0, a1, 64
+; RV64IM-NEXT: andi t1, a1, 128
+; RV64IM-NEXT: andi t2, a1, 256
+; RV64IM-NEXT: andi t3, a1, 512
+; RV64IM-NEXT: andi t4, a1, 1024
+; RV64IM-NEXT: li a6, 1
+; RV64IM-NEXT: lui t5, 1
+; RV64IM-NEXT: lui t6, 2
+; RV64IM-NEXT: lui s0, 4
; RV64IM-NEXT: srli a0, a0, 48
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: xor a2, a3, a2
-; RV64IM-NEXT: andi a3, a1, 64
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: xor a4, a4, a5
-; RV64IM-NEXT: andi a5, a1, 128
-; RV64IM-NEXT: mul a6, a0, a6
-; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: xor a6, a6, a7
-; RV64IM-NEXT: andi a7, a1, 256
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: xor a5, a5, a7
-; RV64IM-NEXT: andi a7, a1, 512
-; RV64IM-NEXT: xor a2, a2, a4
-; RV64IM-NEXT: li a4, 1
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: xor a3, a6, a3
-; RV64IM-NEXT: lui a6, 1
-; RV64IM-NEXT: mul a7, a0, a7
-; RV64IM-NEXT: xor a5, a5, a7
-; RV64IM-NEXT: lui a7, 2
-; RV64IM-NEXT: slli a4, a4, 11
-; RV64IM-NEXT: and a6, a1, a6
-; RV64IM-NEXT: and a4, a1, a4
-; RV64IM-NEXT: mul a6, a0, a6
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: xor a4, a4, a6
-; RV64IM-NEXT: lui a6, 4
-; RV64IM-NEXT: xor a2, a2, a3
-; RV64IM-NEXT: lui a3, 8
-; RV64IM-NEXT: and a7, a1, a7
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: seqz a3, a3
+; RV64IM-NEXT: seqz a4, a4
+; RV64IM-NEXT: seqz a5, a5
+; RV64IM-NEXT: seqz a7, a7
+; RV64IM-NEXT: seqz t0, t0
+; RV64IM-NEXT: seqz t1, t1
+; RV64IM-NEXT: seqz t2, t2
+; RV64IM-NEXT: seqz t3, t3
+; RV64IM-NEXT: seqz t4, t4
+; RV64IM-NEXT: slli s1, a0, 1
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: and a2, a2, s1
+; RV64IM-NEXT: slli s1, a0, 2
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and a3, a3, s1
+; RV64IM-NEXT: slli s1, a0, 3
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: and a4, a4, s1
+; RV64IM-NEXT: slli s1, a0, 4
+; RV64IM-NEXT: addi a5, a5, -1
+; RV64IM-NEXT: and a5, a5, s1
+; RV64IM-NEXT: slli s1, a0, 5
+; RV64IM-NEXT: addi a7, a7, -1
+; RV64IM-NEXT: and a7, a7, s1
+; RV64IM-NEXT: slli s1, a0, 6
+; RV64IM-NEXT: addi t0, t0, -1
+; RV64IM-NEXT: and t0, t0, s1
+; RV64IM-NEXT: slli s1, a0, 7
+; RV64IM-NEXT: addi t1, t1, -1
+; RV64IM-NEXT: and t1, t1, s1
+; RV64IM-NEXT: slli s1, a0, 8
+; RV64IM-NEXT: addi t2, t2, -1
+; RV64IM-NEXT: and t2, t2, s1
+; RV64IM-NEXT: slli s1, a0, 9
+; RV64IM-NEXT: addi t3, t3, -1
+; RV64IM-NEXT: and t3, t3, s1
+; RV64IM-NEXT: slli s1, a0, 10
+; RV64IM-NEXT: addi t4, t4, -1
+; RV64IM-NEXT: and t4, t4, s1
+; RV64IM-NEXT: lui s1, 8
+; RV64IM-NEXT: slli a6, a6, 11
+; RV64IM-NEXT: and t5, a1, t5
+; RV64IM-NEXT: and t6, a1, t6
+; RV64IM-NEXT: and s0, a1, s0
+; RV64IM-NEXT: and s1, a1, s1
; RV64IM-NEXT: and a6, a1, a6
-; RV64IM-NEXT: and a3, a1, a3
-; RV64IM-NEXT: andi a1, a1, 1024
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: xor a1, a5, a1
-; RV64IM-NEXT: mul a5, a0, a7
-; RV64IM-NEXT: xor a4, a4, a5
-; RV64IM-NEXT: xor a1, a2, a1
-; RV64IM-NEXT: mul a2, a0, a6
-; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: andi a1, a1, 1
+; RV64IM-NEXT: seqz a1, a1
+; RV64IM-NEXT: addi a1, a1, -1
+; RV64IM-NEXT: mul t5, a0, t5
+; RV64IM-NEXT: mul t6, a0, t6
+; RV64IM-NEXT: mul s0, a0, s0
+; RV64IM-NEXT: mul s1, a0, s1
+; RV64IM-NEXT: and a1, a1, a0
+; RV64IM-NEXT: mul a0, a0, a6
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: xor a3, a3, a4
+; RV64IM-NEXT: xor a2, a5, a7
+; RV64IM-NEXT: xor a4, t1, t2
+; RV64IM-NEXT: xor a0, a0, t5
+; RV64IM-NEXT: xor a1, a1, a3
+; RV64IM-NEXT: xor a2, a2, t0
+; RV64IM-NEXT: xor a3, a4, t3
+; RV64IM-NEXT: xor a0, a0, t6
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: xor a2, a3, t4
+; RV64IM-NEXT: xor a0, a0, s0
; RV64IM-NEXT: xor a1, a1, a2
-; RV64IM-NEXT: mul a0, a0, a3
+; RV64IM-NEXT: xor a0, a0, s1
; RV64IM-NEXT: xor a0, a1, a0
; RV64IM-NEXT: srli a0, a0, 15
+; RV64IM-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 16
; RV64IM-NEXT: ret
;
; RV32IMZBS-LABEL: clmulr_i16:
@@ -2051,9 +565,9 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
; RV32IMZBS-NEXT: addi sp, sp, -16
; RV32IMZBS-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: slli a0, a0, 16
-; RV32IMZBS-NEXT: andi a2, a1, 2
-; RV32IMZBS-NEXT: andi a3, a1, 1
+; RV32IMZBS-NEXT: andi a3, a1, 2
; RV32IMZBS-NEXT: andi a4, a1, 4
; RV32IMZBS-NEXT: andi a5, a1, 8
; RV32IMZBS-NEXT: andi a6, a1, 16
@@ -2062,63 +576,103 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
; RV32IMZBS-NEXT: andi t1, a1, 128
; RV32IMZBS-NEXT: andi t2, a1, 256
; RV32IMZBS-NEXT: andi t3, a1, 512
-; RV32IMZBS-NEXT: bseti t4, zero, 11
-; RV32IMZBS-NEXT: lui t5, 1
-; RV32IMZBS-NEXT: lui t6, 2
-; RV32IMZBS-NEXT: lui s0, 4
-; RV32IMZBS-NEXT: lui s1, 8
-; RV32IMZBS-NEXT: and t4, a1, t4
-; RV32IMZBS-NEXT: and t5, a1, t5
-; RV32IMZBS-NEXT: and t6, a1, t6
-; RV32IMZBS-NEXT: and s0, a1, s0
-; RV32IMZBS-NEXT: and s1, a1, s1
-; RV32IMZBS-NEXT: andi a1, a1, 1024
+; RV32IMZBS-NEXT: andi t4, a1, 1024
+; RV32IMZBS-NEXT: not a2, a1
; RV32IMZBS-NEXT: srli a0, a0, 16
-; RV32IMZBS-NEXT: mul a2, a0, a2
-; RV32IMZBS-NEXT: mul a3, a0, a3
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: mul a6, a0, a6
-; RV32IMZBS-NEXT: mul a7, a0, a7
-; RV32IMZBS-NEXT: mul t0, a0, t0
-; RV32IMZBS-NEXT: mul t1, a0, t1
-; RV32IMZBS-NEXT: mul t2, a0, t2
-; RV32IMZBS-NEXT: mul t3, a0, t3
-; RV32IMZBS-NEXT: mul a1, a0, a1
-; RV32IMZBS-NEXT: mul t4, a0, t4
-; RV32IMZBS-NEXT: mul t5, a0, t5
-; RV32IMZBS-NEXT: mul t6, a0, t6
-; RV32IMZBS-NEXT: mul s0, a0, s0
-; RV32IMZBS-NEXT: mul a0, a0, s1
-; RV32IMZBS-NEXT: xor a2, a3, a2
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: seqz a5, a5
+; RV32IMZBS-NEXT: seqz a6, a6
+; RV32IMZBS-NEXT: seqz a7, a7
+; RV32IMZBS-NEXT: seqz t0, t0
+; RV32IMZBS-NEXT: seqz t1, t1
+; RV32IMZBS-NEXT: seqz t2, t2
+; RV32IMZBS-NEXT: seqz t3, t3
+; RV32IMZBS-NEXT: seqz t4, t4
+; RV32IMZBS-NEXT: bexti t5, a2, 11
+; RV32IMZBS-NEXT: bexti t6, a2, 12
+; RV32IMZBS-NEXT: bexti s0, a2, 13
+; RV32IMZBS-NEXT: bexti s1, a2, 14
+; RV32IMZBS-NEXT: slli s2, a0, 1
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a3, a3, s2
+; RV32IMZBS-NEXT: slli s2, a0, 2
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a4, a4, s2
+; RV32IMZBS-NEXT: slli s2, a0, 3
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a5, a5, s2
+; RV32IMZBS-NEXT: slli s2, a0, 4
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and a6, a6, s2
+; RV32IMZBS-NEXT: slli s2, a0, 5
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: and a7, a7, s2
+; RV32IMZBS-NEXT: slli s2, a0, 6
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: and t0, t0, s2
+; RV32IMZBS-NEXT: slli s2, a0, 7
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: and t1, t1, s2
+; RV32IMZBS-NEXT: slli s2, a0, 8
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: and t2, t2, s2
+; RV32IMZBS-NEXT: slli s2, a0, 9
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: and t3, t3, s2
+; RV32IMZBS-NEXT: slli s2, a0, 10
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: and t4, t4, s2
+; RV32IMZBS-NEXT: slli s2, a0, 11
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and t5, t5, s2
+; RV32IMZBS-NEXT: slli s2, a0, 12
+; RV32IMZBS-NEXT: addi t6, t6, -1
+; RV32IMZBS-NEXT: and t6, t6, s2
+; RV32IMZBS-NEXT: slli s2, a0, 13
+; RV32IMZBS-NEXT: addi s0, s0, -1
+; RV32IMZBS-NEXT: and s0, s0, s2
+; RV32IMZBS-NEXT: slli s2, a0, 14
+; RV32IMZBS-NEXT: addi s1, s1, -1
+; RV32IMZBS-NEXT: and s1, s1, s2
+; RV32IMZBS-NEXT: andi a1, a1, 1
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: bexti a2, a2, 15
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a1, a1, a0
+; RV32IMZBS-NEXT: slli a0, a0, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a0, a2, a0
+; RV32IMZBS-NEXT: xor a1, a1, a3
; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: xor a3, a6, a7
-; RV32IMZBS-NEXT: xor a5, t1, t2
-; RV32IMZBS-NEXT: xor a6, t4, t5
-; RV32IMZBS-NEXT: xor a2, a2, a4
-; RV32IMZBS-NEXT: xor a3, a3, t0
-; RV32IMZBS-NEXT: xor a4, a5, t3
-; RV32IMZBS-NEXT: xor a5, a6, t6
-; RV32IMZBS-NEXT: xor a2, a2, a3
-; RV32IMZBS-NEXT: xor a1, a4, a1
+; RV32IMZBS-NEXT: xor a2, a6, a7
+; RV32IMZBS-NEXT: xor a3, t1, t2
+; RV32IMZBS-NEXT: xor a5, t5, t6
+; RV32IMZBS-NEXT: xor a1, a1, a4
+; RV32IMZBS-NEXT: xor a2, a2, t0
+; RV32IMZBS-NEXT: xor a3, a3, t3
; RV32IMZBS-NEXT: xor a5, a5, s0
-; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: xor a2, a3, t4
+; RV32IMZBS-NEXT: xor a5, a5, s1
+; RV32IMZBS-NEXT: xor a1, a1, a2
; RV32IMZBS-NEXT: xor a0, a5, a0
; RV32IMZBS-NEXT: xor a0, a1, a0
; RV32IMZBS-NEXT: srli a0, a0, 15
; RV32IMZBS-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: addi sp, sp, 16
; RV32IMZBS-NEXT: ret
;
; RV64IMZBS-LABEL: clmulr_i16:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -16
-; RV64IMZBS-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 0(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: addi sp, sp, -32
+; RV64IMZBS-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
; RV64IMZBS-NEXT: slli a0, a0, 48
-; RV64IMZBS-NEXT: andi a2, a1, 2
-; RV64IMZBS-NEXT: andi a3, a1, 1
+; RV64IMZBS-NEXT: andi a3, a1, 2
; RV64IMZBS-NEXT: andi a4, a1, 4
; RV64IMZBS-NEXT: andi a5, a1, 8
; RV64IMZBS-NEXT: andi a6, a1, 16
@@ -2127,53 +681,93 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
; RV64IMZBS-NEXT: andi t1, a1, 128
; RV64IMZBS-NEXT: andi t2, a1, 256
; RV64IMZBS-NEXT: andi t3, a1, 512
-; RV64IMZBS-NEXT: bseti t4, zero, 11
-; RV64IMZBS-NEXT: lui t5, 1
-; RV64IMZBS-NEXT: lui t6, 2
-; RV64IMZBS-NEXT: lui s0, 4
-; RV64IMZBS-NEXT: lui s1, 8
-; RV64IMZBS-NEXT: and t4, a1, t4
-; RV64IMZBS-NEXT: and t5, a1, t5
-; RV64IMZBS-NEXT: and t6, a1, t6
-; RV64IMZBS-NEXT: and s0, a1, s0
-; RV64IMZBS-NEXT: and s1, a1, s1
-; RV64IMZBS-NEXT: andi a1, a1, 1024
+; RV64IMZBS-NEXT: andi t4, a1, 1024
+; RV64IMZBS-NEXT: not a2, a1
; RV64IMZBS-NEXT: srli a0, a0, 48
-; RV64IMZBS-NEXT: mul a2, a0, a2
-; RV64IMZBS-NEXT: mul a3, a0, a3
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: mul a5, a0, a5
-; RV64IMZBS-NEXT: mul a6, a0, a6
-; RV64IMZBS-NEXT: mul a7, a0, a7
-; RV64IMZBS-NEXT: mul t0, a0, t0
-; RV64IMZBS-NEXT: mul t1, a0, t1
-; RV64IMZBS-NEXT: mul t2, a0, t2
-; RV64IMZBS-NEXT: mul t3, a0, t3
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: mul t4, a0, t4
-; RV64IMZBS-NEXT: mul t5, a0, t5
-; RV64IMZBS-NEXT: mul t6, a0, t6
-; RV64IMZBS-NEXT: mul s0, a0, s0
-; RV64IMZBS-NEXT: mul a0, a0, s1
-; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: seqz a4, a4
+; RV64IMZBS-NEXT: seqz a5, a5
+; RV64IMZBS-NEXT: seqz a6, a6
+; RV64IMZBS-NEXT: seqz a7, a7
+; RV64IMZBS-NEXT: seqz t0, t0
+; RV64IMZBS-NEXT: seqz t1, t1
+; RV64IMZBS-NEXT: seqz t2, t2
+; RV64IMZBS-NEXT: seqz t3, t3
+; RV64IMZBS-NEXT: seqz t4, t4
+; RV64IMZBS-NEXT: bexti t5, a2, 11
+; RV64IMZBS-NEXT: bexti t6, a2, 12
+; RV64IMZBS-NEXT: bexti s0, a2, 13
+; RV64IMZBS-NEXT: bexti s1, a2, 14
+; RV64IMZBS-NEXT: slli s2, a0, 1
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, s2
+; RV64IMZBS-NEXT: slli s2, a0, 2
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a4, a4, s2
+; RV64IMZBS-NEXT: slli s2, a0, 3
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a5, a5, s2
+; RV64IMZBS-NEXT: slli s2, a0, 4
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: and a6, a6, s2
+; RV64IMZBS-NEXT: slli s2, a0, 5
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: and a7, a7, s2
+; RV64IMZBS-NEXT: slli s2, a0, 6
+; RV64IMZBS-NEXT: addi t0, t0, -1
+; RV64IMZBS-NEXT: and t0, t0, s2
+; RV64IMZBS-NEXT: slli s2, a0, 7
+; RV64IMZBS-NEXT: addi t1, t1, -1
+; RV64IMZBS-NEXT: and t1, t1, s2
+; RV64IMZBS-NEXT: slli s2, a0, 8
+; RV64IMZBS-NEXT: addi t2, t2, -1
+; RV64IMZBS-NEXT: and t2, t2, s2
+; RV64IMZBS-NEXT: slli s2, a0, 9
+; RV64IMZBS-NEXT: addi t3, t3, -1
+; RV64IMZBS-NEXT: and t3, t3, s2
+; RV64IMZBS-NEXT: slli s2, a0, 10
+; RV64IMZBS-NEXT: addi t4, t4, -1
+; RV64IMZBS-NEXT: and t4, t4, s2
+; RV64IMZBS-NEXT: slli s2, a0, 11
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: and t5, t5, s2
+; RV64IMZBS-NEXT: slli s2, a0, 12
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: and t6, t6, s2
+; RV64IMZBS-NEXT: slli s2, a0, 13
+; RV64IMZBS-NEXT: addi s0, s0, -1
+; RV64IMZBS-NEXT: and s0, s0, s2
+; RV64IMZBS-NEXT: slli s2, a0, 14
+; RV64IMZBS-NEXT: addi s1, s1, -1
+; RV64IMZBS-NEXT: and s1, s1, s2
+; RV64IMZBS-NEXT: andi a1, a1, 1
+; RV64IMZBS-NEXT: seqz a1, a1
+; RV64IMZBS-NEXT: bexti a2, a2, 15
+; RV64IMZBS-NEXT: addi a1, a1, -1
+; RV64IMZBS-NEXT: and a1, a1, a0
+; RV64IMZBS-NEXT: slli a0, a0, 15
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: and a0, a2, a0
+; RV64IMZBS-NEXT: xor a1, a1, a3
; RV64IMZBS-NEXT: xor a4, a4, a5
-; RV64IMZBS-NEXT: xor a3, a6, a7
-; RV64IMZBS-NEXT: xor a5, t1, t2
-; RV64IMZBS-NEXT: xor a6, t4, t5
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: xor a3, a3, t0
-; RV64IMZBS-NEXT: xor a4, a5, t3
-; RV64IMZBS-NEXT: xor a5, a6, t6
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: xor a1, a4, a1
+; RV64IMZBS-NEXT: xor a2, a6, a7
+; RV64IMZBS-NEXT: xor a3, t1, t2
+; RV64IMZBS-NEXT: xor a5, t5, t6
+; RV64IMZBS-NEXT: xor a1, a1, a4
+; RV64IMZBS-NEXT: xor a2, a2, t0
+; RV64IMZBS-NEXT: xor a3, a3, t3
; RV64IMZBS-NEXT: xor a5, a5, s0
-; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: xor a1, a1, a2
+; RV64IMZBS-NEXT: xor a2, a3, t4
+; RV64IMZBS-NEXT: xor a5, a5, s1
+; RV64IMZBS-NEXT: xor a1, a1, a2
; RV64IMZBS-NEXT: xor a0, a5, a0
; RV64IMZBS-NEXT: xor a0, a1, a0
; RV64IMZBS-NEXT: srli a0, a0, 15
-; RV64IMZBS-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 16
+; RV64IMZBS-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 32
; RV64IMZBS-NEXT: ret
%a.ext = zext i16 %a to i32
%b.ext = zext i16 %b to i32
@@ -2186,1485 +780,1567 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
define i32 @clmulr_i32(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: clmulr_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -48
-; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: srli a2, a0, 8
-; RV32I-NEXT: lui s7, 16
-; RV32I-NEXT: srli a3, a0, 24
-; RV32I-NEXT: slli a4, a0, 24
-; RV32I-NEXT: lui a5, 61681
-; RV32I-NEXT: srli a6, a1, 8
-; RV32I-NEXT: addi s3, s7, -256
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: and a6, a6, s3
-; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: lui a6, 209715
-; RV32I-NEXT: and a0, a0, s3
+; RV32I-NEXT: addi sp, sp, -96
+; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli s0, a0, 8
+; RV32I-NEXT: lui a6, 16
+; RV32I-NEXT: srli s1, a0, 24
+; RV32I-NEXT: slli s2, a0, 24
+; RV32I-NEXT: lui a3, 61681
+; RV32I-NEXT: lui a4, 209715
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: srli s6, a1, 8
+; RV32I-NEXT: srli s4, a1, 24
+; RV32I-NEXT: slli s5, a1, 24
+; RV32I-NEXT: li t5, 1
+; RV32I-NEXT: lui a7, 1
+; RV32I-NEXT: lui t0, 2
+; RV32I-NEXT: lui s11, 8
+; RV32I-NEXT: lui t3, 32
+; RV32I-NEXT: lui t4, 64
+; RV32I-NEXT: lui t2, 128
+; RV32I-NEXT: lui t6, 256
+; RV32I-NEXT: addi t1, a6, -256
+; RV32I-NEXT: addi s3, a3, -241
+; RV32I-NEXT: addi a4, a4, 819
+; RV32I-NEXT: addi a3, a2, 1365
+; RV32I-NEXT: slli a2, t5, 11
+; RV32I-NEXT: and t5, s0, t1
+; RV32I-NEXT: and a0, a0, t1
+; RV32I-NEXT: and s0, s6, t1
+; RV32I-NEXT: and a1, a1, t1
+; RV32I-NEXT: or t5, t5, s1
; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a0, a4, a0
-; RV32I-NEXT: slli a4, a1, 24
-; RV32I-NEXT: and a1, a1, s3
+; RV32I-NEXT: or s0, s0, s4
; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: lui a4, 349525
-; RV32I-NEXT: addi s5, a5, -241
-; RV32I-NEXT: addi s4, a6, 819
-; RV32I-NEXT: addi s2, a4, 1365
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: and a2, a2, s5
+; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: or a1, s5, a1
+; RV32I-NEXT: or a0, a0, t5
+; RV32I-NEXT: or a1, a1, s0
+; RV32I-NEXT: srli t5, a0, 4
+; RV32I-NEXT: and a0, a0, s3
+; RV32I-NEXT: srli s0, a1, 4
+; RV32I-NEXT: and a1, a1, s3
+; RV32I-NEXT: and t5, t5, s3
; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: and a3, a3, s5
+; RV32I-NEXT: and s0, s0, s3
; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: and a2, a2, s4
+; RV32I-NEXT: or a0, t5, a0
+; RV32I-NEXT: or a1, s0, a1
+; RV32I-NEXT: srli t5, a0, 2
+; RV32I-NEXT: sw a4, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: srli s0, a1, 2
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: and t5, t5, a4
; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s4
+; RV32I-NEXT: and s0, s0, a4
; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 1
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, s2
-; RV32I-NEXT: and a2, a2, s2
+; RV32I-NEXT: or a0, t5, a0
+; RV32I-NEXT: or a1, s0, a1
+; RV32I-NEXT: srli t5, a0, 1
+; RV32I-NEXT: sw a3, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: srli s0, a1, 1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: and s1, t5, a3
; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, s2
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: or s0, a2, a0
-; RV32I-NEXT: or s6, a3, a1
-; RV32I-NEXT: andi a1, s6, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s6, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s8, a0, s1
-; RV32I-NEXT: andi a1, s6, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s6, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: andi a1, s6, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s6, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s6, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: andi a1, s6, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s6, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s6, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s6, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: and a1, s6, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and s0, s0, a3
+; RV32I-NEXT: slli t5, a1, 1
+; RV32I-NEXT: or a4, s1, a0
+; RV32I-NEXT: or a5, s0, t5
+; RV32I-NEXT: srli t5, t5, 31
+; RV32I-NEXT: slli s0, a4, 1
+; RV32I-NEXT: andi s1, a5, 2
+; RV32I-NEXT: slli a0, a4, 2
+; RV32I-NEXT: andi s4, a5, 4
+; RV32I-NEXT: slli s5, a4, 3
+; RV32I-NEXT: andi s6, a5, 8
+; RV32I-NEXT: slli s2, a4, 4
+; RV32I-NEXT: andi s7, a5, 16
+; RV32I-NEXT: slli s8, a4, 5
+; RV32I-NEXT: andi s10, a5, 32
+; RV32I-NEXT: slli s9, a4, 31
+; RV32I-NEXT: seqz t5, t5
+; RV32I-NEXT: addi t5, t5, -1
+; RV32I-NEXT: and a1, t5, s9
+; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli t5, a4, 6
+; RV32I-NEXT: seqz s1, s1
+; RV32I-NEXT: addi s1, s1, -1
+; RV32I-NEXT: and s0, s1, s0
+; RV32I-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi s0, a5, 64
+; RV32I-NEXT: seqz s4, s4
+; RV32I-NEXT: addi s4, s4, -1
+; RV32I-NEXT: and a0, s4, a0
+; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s1, a4, 7
+; RV32I-NEXT: seqz s4, s6
+; RV32I-NEXT: addi s4, s4, -1
+; RV32I-NEXT: and a0, s4, s5
+; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi ra, a5, 128
+; RV32I-NEXT: seqz s5, s7
+; RV32I-NEXT: addi s5, s5, -1
+; RV32I-NEXT: and a0, s5, s2
+; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a0, a4, 8
+; RV32I-NEXT: seqz s6, s10
+; RV32I-NEXT: addi s6, s6, -1
+; RV32I-NEXT: and a1, s6, s8
+; RV32I-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi s10, a5, 256
+; RV32I-NEXT: seqz s0, s0
+; RV32I-NEXT: addi s0, s0, -1
+; RV32I-NEXT: and a1, s0, t5
+; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli t5, a4, 9
+; RV32I-NEXT: seqz s0, ra
+; RV32I-NEXT: addi s0, s0, -1
+; RV32I-NEXT: and s0, s0, s1
+; RV32I-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi s0, a5, 512
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: and s5, s10, a0
+; RV32I-NEXT: slli a0, a4, 10
+; RV32I-NEXT: seqz s0, s0
+; RV32I-NEXT: addi s0, s0, -1
+; RV32I-NEXT: and s10, s0, t5
+; RV32I-NEXT: andi t5, a5, 1024
+; RV32I-NEXT: seqz t5, t5
+; RV32I-NEXT: addi t5, t5, -1
+; RV32I-NEXT: and a0, t5, a0
+; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli t5, a4, 11
+; RV32I-NEXT: and s0, a5, a2
+; RV32I-NEXT: seqz s0, s0
+; RV32I-NEXT: addi s0, s0, -1
+; RV32I-NEXT: and s4, s0, t5
+; RV32I-NEXT: slli t5, a4, 12
+; RV32I-NEXT: and a7, a5, a7
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: and ra, a7, t5
+; RV32I-NEXT: slli a7, a4, 13
+; RV32I-NEXT: and t0, a5, t0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: and t5, t0, a7
+; RV32I-NEXT: slli a7, a4, 14
; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: and a1, s6, s7
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lui a1, 1024
-; RV32I-NEXT: xor s7, s8, a0
-; RV32I-NEXT: and a1, s6, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: and t0, a5, a0
+; RV32I-NEXT: seqz t0, t0
+; RV32I-NEXT: addi t0, t0, -1
+; RV32I-NEXT: and s1, t0, a7
+; RV32I-NEXT: slli a7, a4, 15
+; RV32I-NEXT: and a2, a5, s11
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and s0, a2, a7
+; RV32I-NEXT: slli a2, a4, 16
+; RV32I-NEXT: and a7, a5, a6
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: and t0, a7, a2
+; RV32I-NEXT: slli a2, a4, 17
+; RV32I-NEXT: and a7, a5, t3
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: and a7, a7, a2
+; RV32I-NEXT: slli a2, a4, 18
+; RV32I-NEXT: and t3, a5, t4
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: and t3, t3, a2
+; RV32I-NEXT: slli a2, a4, 19
+; RV32I-NEXT: and t2, a5, t2
+; RV32I-NEXT: seqz t2, t2
+; RV32I-NEXT: addi t2, t2, -1
+; RV32I-NEXT: and t2, t2, a2
+; RV32I-NEXT: slli a2, a4, 20
+; RV32I-NEXT: and t4, a5, t6
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: addi t4, t4, -1
+; RV32I-NEXT: and t4, t4, a2
+; RV32I-NEXT: lui a2, 512
+; RV32I-NEXT: and a2, a5, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli t6, a4, 21
+; RV32I-NEXT: and t6, a2, t6
+; RV32I-NEXT: lui a2, 1024
+; RV32I-NEXT: and a2, a5, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a0, a4, 22
+; RV32I-NEXT: and a6, a2, a0
; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lui a1, 131072
-; RV32I-NEXT: xor s7, s7, a0
-; RV32I-NEXT: and a1, s6, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s6, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor a0, s7, a0
+; RV32I-NEXT: and a0, a5, a0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli s2, a4, 23
+; RV32I-NEXT: and a2, a0, s2
+; RV32I-NEXT: lui s2, 4096
+; RV32I-NEXT: and s2, a5, s2
+; RV32I-NEXT: seqz s2, s2
+; RV32I-NEXT: addi s2, s2, -1
+; RV32I-NEXT: slli s7, a4, 24
+; RV32I-NEXT: and s2, s2, s7
+; RV32I-NEXT: lui s7, 8192
+; RV32I-NEXT: and s7, a5, s7
+; RV32I-NEXT: seqz s7, s7
+; RV32I-NEXT: addi s7, s7, -1
+; RV32I-NEXT: slli s8, a4, 25
+; RV32I-NEXT: and s7, s7, s8
+; RV32I-NEXT: lui s8, 16384
+; RV32I-NEXT: and s8, a5, s8
+; RV32I-NEXT: seqz s8, s8
+; RV32I-NEXT: addi s8, s8, -1
+; RV32I-NEXT: slli s9, a4, 26
+; RV32I-NEXT: and s8, s8, s9
+; RV32I-NEXT: lui s9, 32768
+; RV32I-NEXT: and s9, a5, s9
+; RV32I-NEXT: seqz s9, s9
+; RV32I-NEXT: addi s9, s9, -1
+; RV32I-NEXT: slli s11, a4, 27
+; RV32I-NEXT: and s9, s9, s11
+; RV32I-NEXT: lui s11, 65536
+; RV32I-NEXT: and s11, a5, s11
+; RV32I-NEXT: seqz s11, s11
+; RV32I-NEXT: addi s11, s11, -1
+; RV32I-NEXT: slli a1, a4, 28
+; RV32I-NEXT: and a1, s11, a1
+; RV32I-NEXT: lui s11, 131072
+; RV32I-NEXT: and s11, a5, s11
+; RV32I-NEXT: seqz s11, s11
+; RV32I-NEXT: addi s11, s11, -1
+; RV32I-NEXT: slli a3, a4, 29
+; RV32I-NEXT: and a3, s11, a3
+; RV32I-NEXT: lui s11, 262144
+; RV32I-NEXT: and s11, a5, s11
+; RV32I-NEXT: andi a5, a5, 1
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: and a5, a5, a4
+; RV32I-NEXT: slli a4, a4, 30
+; RV32I-NEXT: seqz s11, s11
+; RV32I-NEXT: addi s11, s11, -1
+; RV32I-NEXT: and a4, s11, a4
+; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a0
+; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s11, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, a0, s11
+; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, s6
+; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, s6, s5
+; RV32I-NEXT: xor s4, s4, ra
+; RV32I-NEXT: xor a7, t0, a7
+; RV32I-NEXT: xor a2, a6, a2
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: xor a4, a5, s11
+; RV32I-NEXT: lw a5, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a5
+; RV32I-NEXT: xor a5, s5, s10
+; RV32I-NEXT: xor a6, s4, t5
+; RV32I-NEXT: xor a7, a7, t3
+; RV32I-NEXT: xor a2, a2, s2
+; RV32I-NEXT: lw t0, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, t0
+; RV32I-NEXT: xor a0, a4, a0
+; RV32I-NEXT: lw a4, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a5, a4
+; RV32I-NEXT: xor a5, a6, s1
+; RV32I-NEXT: xor a6, a7, t2
+; RV32I-NEXT: xor a2, a2, s7
+; RV32I-NEXT: xor a0, a0, a4
+; RV32I-NEXT: xor a5, a5, s0
+; RV32I-NEXT: xor a4, a6, t4
+; RV32I-NEXT: xor a2, a2, s8
+; RV32I-NEXT: xor a0, a0, a5
+; RV32I-NEXT: xor a4, a4, t6
+; RV32I-NEXT: xor a2, a2, s9
+; RV32I-NEXT: xor a0, a0, a4
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a0, a0, a3
; RV32I-NEXT: srli a1, a0, 8
; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: and a3, a0, s3
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: slli a3, a3, 8
+; RV32I-NEXT: slli a3, a0, 24
+; RV32I-NEXT: and a0, a0, t1
+; RV32I-NEXT: and a1, a1, t1
+; RV32I-NEXT: slli a0, a0, 8
; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
+; RV32I-NEXT: and a0, a0, s3
+; RV32I-NEXT: and a1, a1, s3
; RV32I-NEXT: slli a0, a0, 4
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: lw a2, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: slli a0, a0, 2
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: and a1, a1, s2
+; RV32I-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: slli a0, a0, 1
; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
;
; RV64I-LABEL: clmulr_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi sp, sp, -160
+; RV64I-NEXT: sd ra, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli s0, a0, 32
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, a0, s2
-; RV64I-NEXT: andi a1, s1, 4
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 8
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 16
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 32
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 64
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s1, 128
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, s1, 256
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 512
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: andi a1, s1, 1024
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: li a1, 1
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: slli a1, a1, 11
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 32
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 64
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 256
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 512
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 1024
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 2048
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 4096
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 16384
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 32768
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s2, a0
-; RV64I-NEXT: lui a1, 65536
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: lui a1, 131072
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: lui a1, 262144
-; RV64I-NEXT: and a1, s1, a1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: srliw a1, s1, 31
-; RV64I-NEXT: slli a1, a1, 31
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s3, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor s2, s2, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: xor a0, s2, a0
+; RV64I-NEXT: andi a2, a1, 2
+; RV64I-NEXT: andi a3, a1, 1
+; RV64I-NEXT: andi a4, a1, 4
+; RV64I-NEXT: andi a5, a1, 8
+; RV64I-NEXT: andi a6, a1, 16
+; RV64I-NEXT: andi a7, a1, 32
+; RV64I-NEXT: andi t0, a1, 64
+; RV64I-NEXT: andi t1, a1, 128
+; RV64I-NEXT: andi t3, a1, 256
+; RV64I-NEXT: andi t4, a1, 512
+; RV64I-NEXT: andi t5, a1, 1024
+; RV64I-NEXT: lui s0, 16
+; RV64I-NEXT: lui s2, 32
+; RV64I-NEXT: lui s8, 64
+; RV64I-NEXT: lui s5, 128
+; RV64I-NEXT: lui s1, 256
+; RV64I-NEXT: lui t6, 512
+; RV64I-NEXT: lui s3, 1024
+; RV64I-NEXT: lui t2, 2048
+; RV64I-NEXT: lui s11, 4096
+; RV64I-NEXT: srli s4, a0, 31
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and s4, a2, s4
+; RV64I-NEXT: srli a2, a0, 32
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and s6, a3, a2
+; RV64I-NEXT: srli a2, a0, 30
+; RV64I-NEXT: seqz a3, a4
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and s7, a3, a2
+; RV64I-NEXT: srli a2, a0, 29
+; RV64I-NEXT: seqz a3, a5
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and s9, a3, a2
+; RV64I-NEXT: srli a2, a0, 28
+; RV64I-NEXT: seqz a3, a6
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and s10, a3, a2
+; RV64I-NEXT: srli a2, a0, 27
+; RV64I-NEXT: seqz a3, a7
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and ra, a3, a2
+; RV64I-NEXT: srli a2, a0, 26
+; RV64I-NEXT: seqz a3, t0
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli a2, a0, 25
+; RV64I-NEXT: seqz a3, t1
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and t1, a3, a2
+; RV64I-NEXT: srli a2, a0, 24
+; RV64I-NEXT: seqz a3, t3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a7, a3, a2
+; RV64I-NEXT: srli a2, a0, 23
+; RV64I-NEXT: seqz a3, t4
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli a2, a0, 22
+; RV64I-NEXT: seqz a3, t5
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui t0, 8192
+; RV64I-NEXT: li a2, 1
+; RV64I-NEXT: slli a2, a2, 11
+; RV64I-NEXT: lui a3, 1
+; RV64I-NEXT: and a3, a1, a3
+; RV64I-NEXT: lui a4, 2
+; RV64I-NEXT: and a4, a1, a4
+; RV64I-NEXT: lui a5, 4
+; RV64I-NEXT: and a5, a1, a5
+; RV64I-NEXT: lui a6, 8
+; RV64I-NEXT: and a6, a1, a6
+; RV64I-NEXT: and s0, a1, s0
+; RV64I-NEXT: and s2, a1, s2
+; RV64I-NEXT: and s8, a1, s8
+; RV64I-NEXT: and s5, a1, s5
+; RV64I-NEXT: and s1, a1, s1
+; RV64I-NEXT: and t6, a1, t6
+; RV64I-NEXT: and t4, a1, s3
+; RV64I-NEXT: and s3, a1, t2
+; RV64I-NEXT: and t2, a1, s11
+; RV64I-NEXT: sd t2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and t0, a1, t0
+; RV64I-NEXT: sd t0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui t0, 16384
+; RV64I-NEXT: and t0, a1, t0
+; RV64I-NEXT: sd t0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui t0, 32768
+; RV64I-NEXT: and s11, a1, t0
+; RV64I-NEXT: lui t0, 65536
+; RV64I-NEXT: and t3, a1, t0
+; RV64I-NEXT: lui t0, 131072
+; RV64I-NEXT: and t5, a1, t0
+; RV64I-NEXT: lui t0, 262144
+; RV64I-NEXT: and t0, a1, t0
+; RV64I-NEXT: and t2, a1, a2
+; RV64I-NEXT: sraiw a1, a1, 31
+; RV64I-NEXT: seqz a1, a1
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: srli a2, a0, 1
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: xor s4, s6, s4
+; RV64I-NEXT: xor s6, s7, s9
+; RV64I-NEXT: xor s7, s10, ra
+; RV64I-NEXT: xor s9, t1, a7
+; RV64I-NEXT: seqz a1, a3
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: srli a2, a0, 20
+; RV64I-NEXT: and a7, a1, a2
+; RV64I-NEXT: seqz a1, a4
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: srli a2, a0, 19
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: seqz a2, a5
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: srli a3, a0, 18
+; RV64I-NEXT: and s10, a2, a3
+; RV64I-NEXT: seqz a2, a6
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: srli a3, a0, 17
+; RV64I-NEXT: and ra, a2, a3
+; RV64I-NEXT: seqz a2, s0
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: and a3, a2, a3
+; RV64I-NEXT: seqz a2, s2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: srli a4, a0, 15
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: seqz a4, s8
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: srli s0, a0, 14
+; RV64I-NEXT: and s0, a4, s0
+; RV64I-NEXT: seqz a4, s5
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: srli a5, a0, 13
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: seqz a5, s1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: srli a6, a0, 12
+; RV64I-NEXT: and a5, a5, a6
+; RV64I-NEXT: seqz a6, t6
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: srli t1, a0, 11
+; RV64I-NEXT: and a6, a6, t1
+; RV64I-NEXT: seqz t1, t4
+; RV64I-NEXT: addi t1, t1, -1
+; RV64I-NEXT: srli t4, a0, 10
+; RV64I-NEXT: and t1, t1, t4
+; RV64I-NEXT: seqz t4, s3
+; RV64I-NEXT: addi t4, t4, -1
+; RV64I-NEXT: srli t6, a0, 9
+; RV64I-NEXT: and t4, t4, t6
+; RV64I-NEXT: ld t6, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz t6, t6
+; RV64I-NEXT: addi t6, t6, -1
+; RV64I-NEXT: srli s1, a0, 8
+; RV64I-NEXT: and t6, t6, s1
+; RV64I-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz s1, s1
+; RV64I-NEXT: addi s1, s1, -1
+; RV64I-NEXT: srli s2, a0, 7
+; RV64I-NEXT: and s1, s1, s2
+; RV64I-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: seqz s2, s2
+; RV64I-NEXT: addi s2, s2, -1
+; RV64I-NEXT: srli s3, a0, 6
+; RV64I-NEXT: and s2, s2, s3
+; RV64I-NEXT: seqz s3, s11
+; RV64I-NEXT: addi s3, s3, -1
+; RV64I-NEXT: srli s5, a0, 5
+; RV64I-NEXT: and s3, s3, s5
+; RV64I-NEXT: seqz t3, t3
+; RV64I-NEXT: addi t3, t3, -1
+; RV64I-NEXT: srli s5, a0, 4
+; RV64I-NEXT: and t3, t3, s5
+; RV64I-NEXT: seqz t5, t5
+; RV64I-NEXT: addi t5, t5, -1
+; RV64I-NEXT: srli s5, a0, 3
+; RV64I-NEXT: and t5, t5, s5
+; RV64I-NEXT: seqz t0, t0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: srli s5, a0, 2
+; RV64I-NEXT: and t0, t0, s5
+; RV64I-NEXT: xor s4, s4, s6
+; RV64I-NEXT: ld s5, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s5, s7, s5
+; RV64I-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s6, s9, s6
+; RV64I-NEXT: srli a0, a0, 21
+; RV64I-NEXT: seqz t2, t2
+; RV64I-NEXT: addi t2, t2, -1
+; RV64I-NEXT: and a0, t2, a0
+; RV64I-NEXT: xor t2, s10, ra
+; RV64I-NEXT: xor a4, a4, a5
+; RV64I-NEXT: xor a5, s1, s2
+; RV64I-NEXT: xor s1, s4, s5
+; RV64I-NEXT: ld s2, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, s6, s2
+; RV64I-NEXT: xor a0, a0, a7
+; RV64I-NEXT: xor a3, t2, a3
+; RV64I-NEXT: xor a4, a4, a6
+; RV64I-NEXT: xor a5, a5, s3
+; RV64I-NEXT: xor a6, s1, s2
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: xor a1, a4, t1
+; RV64I-NEXT: xor a3, a5, t3
+; RV64I-NEXT: xor a0, a6, a0
+; RV64I-NEXT: xor a2, a2, s0
+; RV64I-NEXT: xor a1, a1, t4
+; RV64I-NEXT: xor a3, a3, t5
+; RV64I-NEXT: xor a0, a0, a2
+; RV64I-NEXT: xor a1, a1, t6
+; RV64I-NEXT: xor a2, a3, t0
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a0, a0, 31
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32IM-LABEL: clmulr_i32:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -144
-; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli t0, a0, 8
-; RV32IM-NEXT: lui a3, 16
-; RV32IM-NEXT: srli t1, a0, 24
+; RV32IM-NEXT: addi sp, sp, -80
+; RV32IM-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s2, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s3, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s4, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s5, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s6, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s7, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s8, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s9, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli a7, a0, 8
+; RV32IM-NEXT: lui a5, 16
+; RV32IM-NEXT: srli t0, a0, 24
; RV32IM-NEXT: slli a2, a0, 24
-; RV32IM-NEXT: lui t3, 61681
-; RV32IM-NEXT: lui t5, 209715
-; RV32IM-NEXT: lui t6, 349525
-; RV32IM-NEXT: srli t4, a1, 8
-; RV32IM-NEXT: srli a4, a1, 24
-; RV32IM-NEXT: slli a5, a1, 24
-; RV32IM-NEXT: li s7, 1
-; RV32IM-NEXT: lui t2, 4
-; RV32IM-NEXT: lui s0, 8
-; RV32IM-NEXT: lui s1, 32
-; RV32IM-NEXT: lui s2, 64
-; RV32IM-NEXT: lui s3, 128
-; RV32IM-NEXT: lui s4, 256
-; RV32IM-NEXT: lui s8, 512
-; RV32IM-NEXT: lui a7, 1024
-; RV32IM-NEXT: lui s9, 2048
+; RV32IM-NEXT: lui a3, 61681
+; RV32IM-NEXT: lui t1, 209715
+; RV32IM-NEXT: lui t3, 349525
+; RV32IM-NEXT: srli s0, a1, 8
+; RV32IM-NEXT: srli t4, a1, 24
+; RV32IM-NEXT: slli t6, a1, 24
+; RV32IM-NEXT: li ra, 1
+; RV32IM-NEXT: lui s7, 1
+; RV32IM-NEXT: lui t5, 4
+; RV32IM-NEXT: lui s1, 8
+; RV32IM-NEXT: lui s2, 32
+; RV32IM-NEXT: lui s8, 64
+; RV32IM-NEXT: lui s9, 128
+; RV32IM-NEXT: lui s3, 256
+; RV32IM-NEXT: lui s4, 512
+; RV32IM-NEXT: lui s5, 1024
+; RV32IM-NEXT: lui s6, 2048
; RV32IM-NEXT: lui s10, 4096
; RV32IM-NEXT: lui s11, 8192
-; RV32IM-NEXT: lui ra, 16384
-; RV32IM-NEXT: addi s5, a3, -256
-; RV32IM-NEXT: and t0, t0, s5
-; RV32IM-NEXT: or t1, t0, t1
-; RV32IM-NEXT: lui a6, 32768
-; RV32IM-NEXT: and t4, t4, s5
-; RV32IM-NEXT: or a4, t4, a4
-; RV32IM-NEXT: lui t0, 65536
-; RV32IM-NEXT: and a0, a0, s5
+; RV32IM-NEXT: addi a6, a5, -256
+; RV32IM-NEXT: lui a4, 16
+; RV32IM-NEXT: addi a5, a3, -241
+; RV32IM-NEXT: addi t1, t1, 819
+; RV32IM-NEXT: addi a3, t3, 1365
+; RV32IM-NEXT: slli ra, ra, 11
+; RV32IM-NEXT: and a7, a7, a6
+; RV32IM-NEXT: and a0, a0, a6
+; RV32IM-NEXT: and t3, s0, a6
+; RV32IM-NEXT: and a1, a1, a6
+; RV32IM-NEXT: or a7, a7, t0
; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a0, a2, a0
-; RV32IM-NEXT: lui a2, 131072
-; RV32IM-NEXT: and a1, a1, s5
+; RV32IM-NEXT: or t0, t3, t4
; RV32IM-NEXT: slli a1, a1, 8
-; RV32IM-NEXT: or t4, a5, a1
-; RV32IM-NEXT: lui a1, 262144
-; RV32IM-NEXT: or a0, a0, t1
-; RV32IM-NEXT: lui a5, 524288
-; RV32IM-NEXT: addi t3, t3, -241
-; RV32IM-NEXT: addi t5, t5, 819
-; RV32IM-NEXT: addi t6, t6, 1365
-; RV32IM-NEXT: slli s7, s7, 11
-; RV32IM-NEXT: or a4, t4, a4
-; RV32IM-NEXT: srli t4, a0, 4
-; RV32IM-NEXT: and a0, a0, t3
-; RV32IM-NEXT: and t4, t4, t3
+; RV32IM-NEXT: or a0, a2, a0
+; RV32IM-NEXT: or a1, t6, a1
+; RV32IM-NEXT: or a0, a0, a7
+; RV32IM-NEXT: or a1, a1, t0
+; RV32IM-NEXT: srli a7, a0, 4
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: srli t0, a1, 4
+; RV32IM-NEXT: and a1, a1, a5
+; RV32IM-NEXT: and a7, a7, a5
; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: or a0, t4, a0
-; RV32IM-NEXT: srli t4, a4, 4
-; RV32IM-NEXT: and a4, a4, t3
-; RV32IM-NEXT: and t4, t4, t3
-; RV32IM-NEXT: slli a4, a4, 4
-; RV32IM-NEXT: or a4, t4, a4
-; RV32IM-NEXT: srli t4, a0, 2
-; RV32IM-NEXT: and a0, a0, t5
-; RV32IM-NEXT: and t4, t4, t5
+; RV32IM-NEXT: and t0, t0, a5
+; RV32IM-NEXT: slli a1, a1, 4
+; RV32IM-NEXT: or a0, a7, a0
+; RV32IM-NEXT: or a1, t0, a1
+; RV32IM-NEXT: srli a7, a0, 2
+; RV32IM-NEXT: and a0, a0, t1
+; RV32IM-NEXT: srli t0, a1, 2
+; RV32IM-NEXT: and a1, a1, t1
+; RV32IM-NEXT: and a7, a7, t1
; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or a0, t4, a0
-; RV32IM-NEXT: srli t4, a4, 2
-; RV32IM-NEXT: and a4, a4, t5
-; RV32IM-NEXT: and t4, t4, t5
-; RV32IM-NEXT: slli a4, a4, 2
-; RV32IM-NEXT: or t4, t4, a4
-; RV32IM-NEXT: srli a4, a0, 1
-; RV32IM-NEXT: and a0, a0, t6
-; RV32IM-NEXT: and a4, a4, t6
+; RV32IM-NEXT: and t0, t0, t1
+; RV32IM-NEXT: slli a1, a1, 2
+; RV32IM-NEXT: or a0, a7, a0
+; RV32IM-NEXT: or a1, t0, a1
+; RV32IM-NEXT: srli a7, a0, 1
+; RV32IM-NEXT: sw a3, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a3
+; RV32IM-NEXT: srli t0, a1, 1
+; RV32IM-NEXT: and a1, a1, a3
+; RV32IM-NEXT: and a7, a7, a3
; RV32IM-NEXT: slli a0, a0, 1
-; RV32IM-NEXT: or a4, a4, a0
-; RV32IM-NEXT: srli a0, t4, 1
-; RV32IM-NEXT: and t4, t4, t6
-; RV32IM-NEXT: and a0, a0, t6
-; RV32IM-NEXT: slli t4, t4, 1
-; RV32IM-NEXT: or a0, a0, t4
-; RV32IM-NEXT: andi t4, a0, 2
-; RV32IM-NEXT: and s6, a0, s7
-; RV32IM-NEXT: lui t1, 1
-; RV32IM-NEXT: and t1, a0, t1
-; RV32IM-NEXT: sw t1, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t1, 2
-; RV32IM-NEXT: and t1, a0, t1
-; RV32IM-NEXT: sw t1, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t1, a0, t2
-; RV32IM-NEXT: sw t1, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s0, a0, s0
-; RV32IM-NEXT: and a3, a0, a3
-; RV32IM-NEXT: sw a3, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s1, a0, s1
-; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a0, s2
-; RV32IM-NEXT: sw a3, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a0, s3
-; RV32IM-NEXT: and a3, a0, s4
-; RV32IM-NEXT: sw a3, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a0, s8
-; RV32IM-NEXT: sw a3, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a0, a7
-; RV32IM-NEXT: sw a3, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s9, a0, s9
-; RV32IM-NEXT: and a3, a0, s10
-; RV32IM-NEXT: sw a3, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a0, s11
-; RV32IM-NEXT: sw a3, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a0, ra
-; RV32IM-NEXT: sw a3, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a0, a6
-; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a3, a0, t0
-; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a2, a0, a2
-; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a1, a0, a1
-; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a5, a0, a5
-; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a1, a0, 1
-; RV32IM-NEXT: andi a2, a0, 4
-; RV32IM-NEXT: andi a3, a0, 8
-; RV32IM-NEXT: andi a5, a0, 16
-; RV32IM-NEXT: andi a6, a0, 32
-; RV32IM-NEXT: andi a7, a0, 64
-; RV32IM-NEXT: andi t0, a0, 128
-; RV32IM-NEXT: andi t1, a0, 256
-; RV32IM-NEXT: andi t2, a0, 512
-; RV32IM-NEXT: andi a0, a0, 1024
-; RV32IM-NEXT: mul t4, a4, t4
-; RV32IM-NEXT: sw t4, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul ra, a4, a1
-; RV32IM-NEXT: mul s11, a4, a2
-; RV32IM-NEXT: mul s8, a4, a3
-; RV32IM-NEXT: mul s7, a4, a5
-; RV32IM-NEXT: mul s4, a4, a6
-; RV32IM-NEXT: mul a1, a4, a7
-; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a4, t0
-; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s2, a4, t1
-; RV32IM-NEXT: mul t2, a4, t2
-; RV32IM-NEXT: mul a0, a4, a0
-; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a4, s6
-; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a4, a0
-; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t1, a4, a0
-; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a7, a4, a0
-; RV32IM-NEXT: mul s1, a4, s0
-; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a4, a0
-; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a4, a0
-; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a4, a0
-; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a4, s3
-; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a2, a4, a0
-; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a6, a4, a0
-; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t4, a4, a0
-; RV32IM-NEXT: mul s6, a4, s9
-; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a4, a0
-; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a4, a0
-; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a5, a4, a5
-; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t0, a4, t0
-; RV32IM-NEXT: lw s0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s0, a4, s0
-; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s3, a4, s3
-; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s9, a4, s9
-; RV32IM-NEXT: lw s10, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a4, a4, s10
-; RV32IM-NEXT: lw s10, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor ra, ra, s10
-; RV32IM-NEXT: xor s8, s11, s8
-; RV32IM-NEXT: xor s4, s7, s4
-; RV32IM-NEXT: xor t2, s2, t2
-; RV32IM-NEXT: xor a7, t1, a7
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: xor a1, ra, s8
-; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, s4, a3
-; RV32IM-NEXT: lw t1, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t2, t1
-; RV32IM-NEXT: xor a7, a7, s1
-; RV32IM-NEXT: xor a2, a2, a6
-; RV32IM-NEXT: xor a0, a0, a5
-; RV32IM-NEXT: xor a1, a1, a3
+; RV32IM-NEXT: and t0, t0, a3
+; RV32IM-NEXT: slli a1, a1, 1
+; RV32IM-NEXT: or a0, a7, a0
+; RV32IM-NEXT: or a2, t0, a1
+; RV32IM-NEXT: slli t3, a0, 1
+; RV32IM-NEXT: andi t4, a2, 2
+; RV32IM-NEXT: slli t6, a0, 2
+; RV32IM-NEXT: andi s0, a2, 4
+; RV32IM-NEXT: slli a1, a0, 3
+; RV32IM-NEXT: andi a7, a2, 8
+; RV32IM-NEXT: slli a3, a0, 4
+; RV32IM-NEXT: and t0, a2, ra
+; RV32IM-NEXT: and t2, a2, s7
+; RV32IM-NEXT: mul t0, a0, t0
+; RV32IM-NEXT: mul t2, a0, t2
+; RV32IM-NEXT: xor t0, t0, t2
+; RV32IM-NEXT: sw t0, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi s7, a2, 16
+; RV32IM-NEXT: and t0, a2, s8
+; RV32IM-NEXT: and t2, a2, s9
+; RV32IM-NEXT: mul t0, a0, t0
+; RV32IM-NEXT: mul t2, a0, t2
+; RV32IM-NEXT: xor t0, t0, t2
+; RV32IM-NEXT: sw t0, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli s8, a0, 5
+; RV32IM-NEXT: and t0, a2, s10
+; RV32IM-NEXT: and s9, a2, s11
+; RV32IM-NEXT: mul t0, a0, t0
+; RV32IM-NEXT: mul s9, a0, s9
+; RV32IM-NEXT: xor t0, t0, s9
+; RV32IM-NEXT: sw t0, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi s9, a2, 32
+; RV32IM-NEXT: seqz t4, t4
+; RV32IM-NEXT: addi t4, t4, -1
+; RV32IM-NEXT: and t0, t4, t3
+; RV32IM-NEXT: sw t0, 8(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli s10, a0, 6
+; RV32IM-NEXT: seqz t4, s0
+; RV32IM-NEXT: addi t4, t4, -1
+; RV32IM-NEXT: and t0, t4, t6
+; RV32IM-NEXT: sw t0, 4(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi s11, a2, 64
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: and t6, a7, a1
+; RV32IM-NEXT: slli a1, a0, 7
+; RV32IM-NEXT: seqz a7, s7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: and s0, a7, a3
+; RV32IM-NEXT: andi a3, a2, 128
+; RV32IM-NEXT: seqz a7, s9
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: and s8, a7, s8
+; RV32IM-NEXT: slli a7, a0, 8
+; RV32IM-NEXT: seqz s7, s11
+; RV32IM-NEXT: addi s7, s7, -1
+; RV32IM-NEXT: and s7, s7, s10
+; RV32IM-NEXT: andi s10, a2, 256
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and s9, a3, a1
+; RV32IM-NEXT: slli a1, a0, 9
+; RV32IM-NEXT: seqz a3, s10
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and s11, a3, a7
+; RV32IM-NEXT: andi a3, a2, 512
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and s10, a3, a1
+; RV32IM-NEXT: lui a1, 16384
+; RV32IM-NEXT: lui a3, 2
+; RV32IM-NEXT: and a3, a2, a3
+; RV32IM-NEXT: and a7, a2, t5
+; RV32IM-NEXT: and s1, a2, s1
+; RV32IM-NEXT: and t5, a2, a4
+; RV32IM-NEXT: and s2, a2, s2
+; RV32IM-NEXT: and s3, a2, s3
+; RV32IM-NEXT: and s4, a2, s4
+; RV32IM-NEXT: and s5, a2, s5
+; RV32IM-NEXT: and s6, a2, s6
+; RV32IM-NEXT: and a4, a2, a1
+; RV32IM-NEXT: lui ra, 32768
+; RV32IM-NEXT: and ra, a2, ra
+; RV32IM-NEXT: lui t2, 65536
+; RV32IM-NEXT: and t2, a2, t2
+; RV32IM-NEXT: lui t0, 131072
+; RV32IM-NEXT: and t0, a2, t0
+; RV32IM-NEXT: lui t3, 262144
+; RV32IM-NEXT: and t3, a2, t3
+; RV32IM-NEXT: lui t4, 524288
+; RV32IM-NEXT: and t4, a2, t4
+; RV32IM-NEXT: andi a1, a2, 1
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: mul a3, a0, a3
+; RV32IM-NEXT: mul a7, a0, a7
+; RV32IM-NEXT: mul s1, a0, s1
+; RV32IM-NEXT: mul t5, a0, t5
+; RV32IM-NEXT: mul s2, a0, s2
+; RV32IM-NEXT: mul s3, a0, s3
+; RV32IM-NEXT: mul s4, a0, s4
+; RV32IM-NEXT: mul s5, a0, s5
+; RV32IM-NEXT: mul s6, a0, s6
+; RV32IM-NEXT: mul a4, a0, a4
+; RV32IM-NEXT: mul ra, a0, ra
+; RV32IM-NEXT: mul t2, a0, t2
+; RV32IM-NEXT: mul t0, a0, t0
+; RV32IM-NEXT: mul t3, a0, t3
+; RV32IM-NEXT: mul t4, a0, t4
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: and a1, a1, a0
+; RV32IM-NEXT: slli a0, a0, 10
+; RV32IM-NEXT: andi a2, a2, 1024
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: and a0, a2, a0
+; RV32IM-NEXT: lw a2, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a3
; RV32IM-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, t1, a3
-; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a7, a5
-; RV32IM-NEXT: xor a2, a2, t4
-; RV32IM-NEXT: xor a0, a0, t0
-; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a6
-; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a6
-; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: xor a2, a2, s6
-; RV32IM-NEXT: xor a0, a0, s0
-; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: xor a0, a0, s3
-; RV32IM-NEXT: xor a3, a1, a3
-; RV32IM-NEXT: slli a1, a1, 24
-; RV32IM-NEXT: xor a3, a3, a5
-; RV32IM-NEXT: xor a0, a0, s9
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: xor a0, a0, a4
-; RV32IM-NEXT: and a3, a2, s5
-; RV32IM-NEXT: srli a4, a2, 8
-; RV32IM-NEXT: xor a0, a2, a0
-; RV32IM-NEXT: slli a3, a3, 8
-; RV32IM-NEXT: and a2, a4, s5
+; RV32IM-NEXT: xor a3, a3, s3
+; RV32IM-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, s3, a4
+; RV32IM-NEXT: lw s3, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, s3
+; RV32IM-NEXT: lw s3, 4(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s3, t6
+; RV32IM-NEXT: xor s0, s0, s8
+; RV32IM-NEXT: xor s3, s9, s11
+; RV32IM-NEXT: xor a2, a2, a7
+; RV32IM-NEXT: xor a3, a3, s4
+; RV32IM-NEXT: xor a4, a4, ra
+; RV32IM-NEXT: xor a1, a1, t6
+; RV32IM-NEXT: xor a7, s0, s7
+; RV32IM-NEXT: xor t6, s3, s10
+; RV32IM-NEXT: xor a2, a2, s1
+; RV32IM-NEXT: xor a3, a3, s5
+; RV32IM-NEXT: xor a4, a4, t2
+; RV32IM-NEXT: xor a1, a1, a7
+; RV32IM-NEXT: xor a0, t6, a0
+; RV32IM-NEXT: xor a2, a2, t5
+; RV32IM-NEXT: xor a3, a3, s6
+; RV32IM-NEXT: xor a4, a4, t0
+; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: xor a1, a2, s2
+; RV32IM-NEXT: xor a2, a4, t3
+; RV32IM-NEXT: slli a4, a0, 24
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: xor a1, a2, t4
+; RV32IM-NEXT: xor a0, a0, a3
+; RV32IM-NEXT: and a2, a0, a6
+; RV32IM-NEXT: srli a3, a0, 8
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: slli a2, a2, 8
+; RV32IM-NEXT: and a1, a3, a6
; RV32IM-NEXT: srli a0, a0, 24
-; RV32IM-NEXT: or a1, a1, a3
-; RV32IM-NEXT: or a0, a2, a0
+; RV32IM-NEXT: or a2, a4, a2
; RV32IM-NEXT: or a0, a1, a0
+; RV32IM-NEXT: or a0, a2, a0
; RV32IM-NEXT: srli a1, a0, 4
-; RV32IM-NEXT: and a0, a0, t3
-; RV32IM-NEXT: and a1, a1, t3
+; RV32IM-NEXT: and a0, a0, a5
+; RV32IM-NEXT: and a1, a1, a5
; RV32IM-NEXT: slli a0, a0, 4
; RV32IM-NEXT: or a0, a1, a0
; RV32IM-NEXT: srli a1, a0, 2
-; RV32IM-NEXT: and a0, a0, t5
-; RV32IM-NEXT: and a1, a1, t5
+; RV32IM-NEXT: and a0, a0, t1
+; RV32IM-NEXT: and a1, a1, t1
; RV32IM-NEXT: slli a0, a0, 2
; RV32IM-NEXT: or a0, a1, a0
; RV32IM-NEXT: srli a1, a0, 1
-; RV32IM-NEXT: and a0, a0, t6
-; RV32IM-NEXT: and a1, a1, t6
+; RV32IM-NEXT: lw a2, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: and a1, a1, a2
; RV32IM-NEXT: slli a0, a0, 1
; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: addi sp, sp, 144
+; RV32IM-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s4, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s6, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s7, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s8, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s9, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s11, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 80
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: clmulr_i32:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: addi sp, sp, -128
-; RV64IM-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli a6, a0, 32
-; RV64IM-NEXT: andi t1, a1, 2
-; RV64IM-NEXT: andi t3, a1, 1
-; RV64IM-NEXT: andi a5, a1, 4
-; RV64IM-NEXT: andi a7, a1, 8
-; RV64IM-NEXT: andi a3, a1, 16
-; RV64IM-NEXT: andi a4, a1, 32
-; RV64IM-NEXT: andi a0, a1, 64
-; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi t0, a1, 128
-; RV64IM-NEXT: andi t2, a1, 256
-; RV64IM-NEXT: andi a0, a1, 512
-; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: li a2, 1
-; RV64IM-NEXT: lui t5, 1
-; RV64IM-NEXT: lui t6, 2
-; RV64IM-NEXT: lui s0, 4
-; RV64IM-NEXT: lui s2, 8
-; RV64IM-NEXT: lui s3, 16
-; RV64IM-NEXT: lui s4, 32
-; RV64IM-NEXT: lui s5, 64
-; RV64IM-NEXT: lui s6, 128
-; RV64IM-NEXT: lui s7, 256
-; RV64IM-NEXT: lui s8, 512
-; RV64IM-NEXT: lui s9, 1024
-; RV64IM-NEXT: lui s10, 2048
-; RV64IM-NEXT: lui s11, 4096
-; RV64IM-NEXT: lui ra, 8192
-; RV64IM-NEXT: lui a0, 16384
-; RV64IM-NEXT: srli s1, a6, 32
-; RV64IM-NEXT: mul a6, s1, t1
-; RV64IM-NEXT: mul t1, s1, t3
-; RV64IM-NEXT: xor a6, t1, a6
-; RV64IM-NEXT: sd a6, 0(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui t1, 32768
-; RV64IM-NEXT: mul a5, s1, a5
-; RV64IM-NEXT: mul a7, s1, a7
-; RV64IM-NEXT: xor t4, a5, a7
-; RV64IM-NEXT: lui a7, 65536
-; RV64IM-NEXT: mul a3, s1, a3
-; RV64IM-NEXT: mul a4, s1, a4
-; RV64IM-NEXT: xor a6, a3, a4
-; RV64IM-NEXT: lui t3, 131072
-; RV64IM-NEXT: mul a4, s1, t0
-; RV64IM-NEXT: mul t0, s1, t2
-; RV64IM-NEXT: xor a5, a4, t0
-; RV64IM-NEXT: lui t0, 262144
-; RV64IM-NEXT: slli t2, a2, 11
-; RV64IM-NEXT: and t5, a1, t5
-; RV64IM-NEXT: and t6, a1, t6
+; RV64IM-NEXT: addi sp, sp, -16
+; RV64IM-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 0(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a0, a0, 32
+; RV64IM-NEXT: andi a2, a1, 2
+; RV64IM-NEXT: andi a3, a1, 4
+; RV64IM-NEXT: andi a4, a1, 8
+; RV64IM-NEXT: andi a6, a1, 16
+; RV64IM-NEXT: andi a7, a1, 32
+; RV64IM-NEXT: andi t1, a1, 64
+; RV64IM-NEXT: andi t2, a1, 128
+; RV64IM-NEXT: andi t3, a1, 256
+; RV64IM-NEXT: andi t4, a1, 512
+; RV64IM-NEXT: andi t6, a1, 1024
+; RV64IM-NEXT: lui t5, 16
+; RV64IM-NEXT: lui s0, 32
+; RV64IM-NEXT: srli a5, a0, 31
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: and a5, a2, a5
+; RV64IM-NEXT: srli a2, a0, 30
+; RV64IM-NEXT: seqz a3, a3
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and t0, a3, a2
+; RV64IM-NEXT: srli a2, a0, 29
+; RV64IM-NEXT: seqz a3, a4
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and a4, a3, a2
+; RV64IM-NEXT: srli a2, a0, 28
+; RV64IM-NEXT: seqz a3, a6
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and a6, a3, a2
+; RV64IM-NEXT: srli a2, a0, 27
+; RV64IM-NEXT: seqz a3, a7
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and s1, a3, a2
+; RV64IM-NEXT: srli a2, a0, 26
+; RV64IM-NEXT: seqz a3, t1
+; RV64IM-NEXT: addi a3, a3, -1
+; RV64IM-NEXT: and a2, a3, a2
+; RV64IM-NEXT: srli a3, a0, 25
+; RV64IM-NEXT: seqz a7, t2
+; RV64IM-NEXT: addi a7, a7, -1
+; RV64IM-NEXT: and t1, a7, a3
+; RV64IM-NEXT: srli a3, a0, 24
+; RV64IM-NEXT: seqz a7, t3
+; RV64IM-NEXT: addi a7, a7, -1
+; RV64IM-NEXT: and t2, a7, a3
+; RV64IM-NEXT: srli a3, a0, 23
+; RV64IM-NEXT: seqz a7, t4
+; RV64IM-NEXT: addi a7, a7, -1
+; RV64IM-NEXT: and a7, a7, a3
+; RV64IM-NEXT: srli a3, a0, 22
+; RV64IM-NEXT: seqz t3, t6
+; RV64IM-NEXT: addi t3, t3, -1
+; RV64IM-NEXT: and a3, t3, a3
+; RV64IM-NEXT: lui t3, 2048
+; RV64IM-NEXT: srli a0, a0, 32
+; RV64IM-NEXT: and t4, a1, t5
; RV64IM-NEXT: and s0, a1, s0
-; RV64IM-NEXT: and s2, a1, s2
-; RV64IM-NEXT: and s3, a1, s3
-; RV64IM-NEXT: and s4, a1, s4
-; RV64IM-NEXT: and s5, a1, s5
-; RV64IM-NEXT: and s6, a1, s6
-; RV64IM-NEXT: and s7, a1, s7
-; RV64IM-NEXT: and s8, a1, s8
-; RV64IM-NEXT: and s9, a1, s9
-; RV64IM-NEXT: and s10, a1, s10
-; RV64IM-NEXT: and s11, a1, s11
-; RV64IM-NEXT: and ra, a1, ra
-; RV64IM-NEXT: and a2, a1, a0
-; RV64IM-NEXT: and t1, a1, t1
-; RV64IM-NEXT: and a7, a1, a7
+; RV64IM-NEXT: mul t4, a0, t4
+; RV64IM-NEXT: mul t5, a0, s0
+; RV64IM-NEXT: xor t4, t4, t5
+; RV64IM-NEXT: lui t5, 4096
; RV64IM-NEXT: and t3, a1, t3
+; RV64IM-NEXT: and t5, a1, t5
+; RV64IM-NEXT: mul t3, a0, t3
+; RV64IM-NEXT: mul t5, a0, t5
+; RV64IM-NEXT: xor t3, t3, t5
+; RV64IM-NEXT: andi t5, a1, 1
+; RV64IM-NEXT: seqz t5, t5
+; RV64IM-NEXT: addi t5, t5, -1
+; RV64IM-NEXT: and t5, t5, a0
+; RV64IM-NEXT: xor a5, t5, a5
+; RV64IM-NEXT: xor a4, t0, a4
+; RV64IM-NEXT: xor a6, a6, s1
+; RV64IM-NEXT: li t0, 1
+; RV64IM-NEXT: xor t1, t1, t2
+; RV64IM-NEXT: lui t2, 1
+; RV64IM-NEXT: slli t0, t0, 11
+; RV64IM-NEXT: and t2, a1, t2
; RV64IM-NEXT: and t0, a1, t0
+; RV64IM-NEXT: mul t2, a0, t2
+; RV64IM-NEXT: mul t0, a0, t0
+; RV64IM-NEXT: xor t0, t0, t2
+; RV64IM-NEXT: lui t2, 64
; RV64IM-NEXT: and t2, a1, t2
-; RV64IM-NEXT: andi a0, a1, 1024
-; RV64IM-NEXT: srliw a1, a1, 31
-; RV64IM-NEXT: slli a1, a1, 31
-; RV64IM-NEXT: ld a3, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a3, s1, a3
-; RV64IM-NEXT: ld a4, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a4, s1, a4
-; RV64IM-NEXT: mul a0, s1, a0
-; RV64IM-NEXT: mul t5, s1, t5
-; RV64IM-NEXT: mul t6, s1, t6
-; RV64IM-NEXT: mul s0, s1, s0
-; RV64IM-NEXT: mul s2, s1, s2
-; RV64IM-NEXT: mul s3, s1, s3
-; RV64IM-NEXT: mul s4, s1, s4
-; RV64IM-NEXT: mul s5, s1, s5
-; RV64IM-NEXT: mul s6, s1, s6
-; RV64IM-NEXT: mul s7, s1, s7
-; RV64IM-NEXT: mul s8, s1, s8
-; RV64IM-NEXT: mul s9, s1, s9
-; RV64IM-NEXT: mul s10, s1, s10
-; RV64IM-NEXT: mul s11, s1, s11
-; RV64IM-NEXT: mul ra, s1, ra
-; RV64IM-NEXT: mul a2, s1, a2
-; RV64IM-NEXT: mul t1, s1, t1
-; RV64IM-NEXT: mul a7, s1, a7
-; RV64IM-NEXT: mul t3, s1, t3
-; RV64IM-NEXT: mul t0, s1, t0
-; RV64IM-NEXT: mul a1, s1, a1
-; RV64IM-NEXT: mul t2, s1, t2
-; RV64IM-NEXT: xor s1, s2, s3
-; RV64IM-NEXT: xor s2, s8, s9
-; RV64IM-NEXT: xor a7, a7, t3
-; RV64IM-NEXT: ld t3, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul t2, a0, t2
+; RV64IM-NEXT: xor t2, t4, t2
+; RV64IM-NEXT: lui t4, 8192
+; RV64IM-NEXT: and t4, a1, t4
+; RV64IM-NEXT: mul t4, a0, t4
; RV64IM-NEXT: xor t3, t3, t4
-; RV64IM-NEXT: xor a3, a6, a3
; RV64IM-NEXT: xor a4, a5, a4
-; RV64IM-NEXT: xor a5, t2, t5
-; RV64IM-NEXT: xor a6, s1, s4
-; RV64IM-NEXT: xor t2, s2, s10
-; RV64IM-NEXT: xor a7, a7, t0
-; RV64IM-NEXT: xor a3, t3, a3
-; RV64IM-NEXT: xor a0, a4, a0
-; RV64IM-NEXT: xor a4, a5, t6
-; RV64IM-NEXT: xor a5, a6, s5
-; RV64IM-NEXT: xor a6, t2, s11
-; RV64IM-NEXT: xor a0, a3, a0
-; RV64IM-NEXT: xor a4, a4, s0
-; RV64IM-NEXT: xor a3, a5, s6
-; RV64IM-NEXT: xor a5, a6, ra
-; RV64IM-NEXT: xor a0, a0, a4
-; RV64IM-NEXT: xor a3, a3, s7
-; RV64IM-NEXT: xor a2, a5, a2
-; RV64IM-NEXT: xor a0, a0, a3
-; RV64IM-NEXT: xor a2, a2, t1
-; RV64IM-NEXT: xor a0, a0, a2
-; RV64IM-NEXT: xor a1, a7, a1
-; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: xor a2, a6, a2
+; RV64IM-NEXT: xor a5, t1, a7
+; RV64IM-NEXT: lui a6, 2
+; RV64IM-NEXT: and a6, a1, a6
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a6, t0, a6
+; RV64IM-NEXT: lui a7, 128
+; RV64IM-NEXT: and a7, a1, a7
+; RV64IM-NEXT: mul a7, a0, a7
+; RV64IM-NEXT: xor a7, t2, a7
+; RV64IM-NEXT: lui t0, 16384
+; RV64IM-NEXT: and t0, a1, t0
+; RV64IM-NEXT: mul t0, a0, t0
+; RV64IM-NEXT: xor t0, t3, t0
+; RV64IM-NEXT: xor a2, a4, a2
+; RV64IM-NEXT: xor a3, a5, a3
+; RV64IM-NEXT: lui a4, 4
+; RV64IM-NEXT: and a4, a1, a4
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: xor a4, a6, a4
+; RV64IM-NEXT: lui a5, 256
+; RV64IM-NEXT: and a5, a1, a5
+; RV64IM-NEXT: mul a5, a0, a5
+; RV64IM-NEXT: xor a5, a7, a5
+; RV64IM-NEXT: lui a6, 32768
+; RV64IM-NEXT: and a6, a1, a6
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a6, t0, a6
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: lui a3, 8
+; RV64IM-NEXT: and a3, a1, a3
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: xor a3, a4, a3
+; RV64IM-NEXT: lui a4, 512
+; RV64IM-NEXT: and a4, a1, a4
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: xor a4, a5, a4
+; RV64IM-NEXT: lui a5, 65536
+; RV64IM-NEXT: and a5, a1, a5
+; RV64IM-NEXT: mul a5, a0, a5
+; RV64IM-NEXT: xor a5, a6, a5
+; RV64IM-NEXT: lui a6, 1024
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: lui a3, 131072
+; RV64IM-NEXT: and a6, a1, a6
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a4, a4, a6
+; RV64IM-NEXT: lui a6, 262144
+; RV64IM-NEXT: and a3, a1, a3
+; RV64IM-NEXT: and a6, a1, a6
+; RV64IM-NEXT: srliw a1, a1, 31
+; RV64IM-NEXT: slli a1, a1, 31
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a3, a5, a3
+; RV64IM-NEXT: xor a2, a2, a4
+; RV64IM-NEXT: xor a3, a3, a6
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: mul a0, a0, a1
+; RV64IM-NEXT: xor a0, a2, a0
; RV64IM-NEXT: srli a0, a0, 31
-; RV64IM-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: addi sp, sp, 128
+; RV64IM-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 16
; RV64IM-NEXT: ret
;
; RV32IMZBS-LABEL: clmulr_i32:
; RV32IMZBS: # %bb.0:
-; RV32IMZBS-NEXT: addi sp, sp, -144
-; RV32IMZBS-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli t0, a0, 8
+; RV32IMZBS-NEXT: addi sp, sp, -96
+; RV32IMZBS-NEXT: sw ra, 92(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s0, 88(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s1, 84(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s2, 80(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s3, 76(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s4, 72(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s5, 68(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s6, 64(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s7, 60(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s8, 56(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s9, 52(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s10, 48(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw s11, 44(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: srli a2, a0, 8
; RV32IMZBS-NEXT: lui a3, 16
-; RV32IMZBS-NEXT: srli t1, a0, 24
-; RV32IMZBS-NEXT: slli a2, a0, 24
-; RV32IMZBS-NEXT: lui t3, 61681
-; RV32IMZBS-NEXT: lui t4, 209715
-; RV32IMZBS-NEXT: lui a4, 349525
-; RV32IMZBS-NEXT: srli t5, a1, 8
-; RV32IMZBS-NEXT: srli t6, a1, 24
-; RV32IMZBS-NEXT: slli a7, a1, 24
-; RV32IMZBS-NEXT: bseti a6, zero, 11
-; RV32IMZBS-NEXT: lui a5, 2
-; RV32IMZBS-NEXT: lui s0, 4
-; RV32IMZBS-NEXT: lui s1, 8
-; RV32IMZBS-NEXT: lui s2, 32
-; RV32IMZBS-NEXT: lui s3, 64
-; RV32IMZBS-NEXT: lui s4, 128
-; RV32IMZBS-NEXT: lui s6, 256
-; RV32IMZBS-NEXT: lui s7, 512
-; RV32IMZBS-NEXT: lui s8, 1024
-; RV32IMZBS-NEXT: lui s9, 2048
-; RV32IMZBS-NEXT: lui s10, 4096
-; RV32IMZBS-NEXT: lui s11, 8192
-; RV32IMZBS-NEXT: lui ra, 16384
-; RV32IMZBS-NEXT: addi s5, a3, -256
-; RV32IMZBS-NEXT: lui t2, 16
-; RV32IMZBS-NEXT: and t0, t0, s5
-; RV32IMZBS-NEXT: or t1, t0, t1
-; RV32IMZBS-NEXT: and t5, t5, s5
-; RV32IMZBS-NEXT: or t6, t5, t6
-; RV32IMZBS-NEXT: lui t0, 65536
-; RV32IMZBS-NEXT: and a0, a0, s5
+; RV32IMZBS-NEXT: srli a7, a0, 24
+; RV32IMZBS-NEXT: slli t0, a0, 24
+; RV32IMZBS-NEXT: lui a4, 61681
+; RV32IMZBS-NEXT: lui a6, 209715
+; RV32IMZBS-NEXT: lui t1, 349525
+; RV32IMZBS-NEXT: srli t2, a1, 8
+; RV32IMZBS-NEXT: srli t3, a1, 24
+; RV32IMZBS-NEXT: slli t4, a1, 24
+; RV32IMZBS-NEXT: addi t5, a3, -256
+; RV32IMZBS-NEXT: sw t5, 40(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: addi a5, a4, -241
+; RV32IMZBS-NEXT: sw a5, 36(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: addi a4, a6, 819
+; RV32IMZBS-NEXT: addi a3, t1, 1365
+; RV32IMZBS-NEXT: and a2, a2, t5
+; RV32IMZBS-NEXT: and a0, a0, t5
+; RV32IMZBS-NEXT: and t1, t2, t5
+; RV32IMZBS-NEXT: and a1, a1, t5
+; RV32IMZBS-NEXT: or a2, a2, a7
; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a0, a2, a0
-; RV32IMZBS-NEXT: lui a3, 131072
-; RV32IMZBS-NEXT: and a1, a1, s5
+; RV32IMZBS-NEXT: or a7, t1, t3
; RV32IMZBS-NEXT: slli a1, a1, 8
-; RV32IMZBS-NEXT: or a1, a7, a1
-; RV32IMZBS-NEXT: lui a2, 262144
-; RV32IMZBS-NEXT: or a0, a0, t1
-; RV32IMZBS-NEXT: lui a7, 524288
-; RV32IMZBS-NEXT: addi t3, t3, -241
-; RV32IMZBS-NEXT: addi t4, t4, 819
-; RV32IMZBS-NEXT: addi t5, a4, 1365
-; RV32IMZBS-NEXT: or a1, a1, t6
-; RV32IMZBS-NEXT: srli a4, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, t3
-; RV32IMZBS-NEXT: and a4, a4, t3
+; RV32IMZBS-NEXT: or a0, t0, a0
+; RV32IMZBS-NEXT: or a1, t4, a1
+; RV32IMZBS-NEXT: or a0, a0, a2
+; RV32IMZBS-NEXT: or a1, a1, a7
+; RV32IMZBS-NEXT: srli a2, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, a5
+; RV32IMZBS-NEXT: srli a7, a1, 4
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: and a2, a2, a5
; RV32IMZBS-NEXT: slli a0, a0, 4
-; RV32IMZBS-NEXT: or a0, a4, a0
-; RV32IMZBS-NEXT: srli a4, a1, 4
-; RV32IMZBS-NEXT: and a1, a1, t3
-; RV32IMZBS-NEXT: and a4, a4, t3
+; RV32IMZBS-NEXT: and a7, a7, a5
; RV32IMZBS-NEXT: slli a1, a1, 4
-; RV32IMZBS-NEXT: or a1, a4, a1
-; RV32IMZBS-NEXT: srli a4, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, t4
-; RV32IMZBS-NEXT: and a4, a4, t4
+; RV32IMZBS-NEXT: or a0, a2, a0
+; RV32IMZBS-NEXT: or a1, a7, a1
+; RV32IMZBS-NEXT: srli a2, a0, 2
+; RV32IMZBS-NEXT: sw a4, 32(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a4
+; RV32IMZBS-NEXT: srli a7, a1, 2
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: and a2, a2, a4
; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: or a0, a4, a0
-; RV32IMZBS-NEXT: srli a4, a1, 2
-; RV32IMZBS-NEXT: and a1, a1, t4
-; RV32IMZBS-NEXT: and a4, a4, t4
+; RV32IMZBS-NEXT: and a7, a7, a4
; RV32IMZBS-NEXT: slli a1, a1, 2
-; RV32IMZBS-NEXT: or a1, a4, a1
-; RV32IMZBS-NEXT: srli a4, a0, 1
-; RV32IMZBS-NEXT: and a0, a0, t5
-; RV32IMZBS-NEXT: and a4, a4, t5
+; RV32IMZBS-NEXT: or a0, a2, a0
+; RV32IMZBS-NEXT: or a1, a7, a1
+; RV32IMZBS-NEXT: srli a2, a0, 1
+; RV32IMZBS-NEXT: sw a3, 28(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a3
+; RV32IMZBS-NEXT: srli a7, a1, 1
+; RV32IMZBS-NEXT: and a1, a1, a3
+; RV32IMZBS-NEXT: and t0, a2, a3
; RV32IMZBS-NEXT: slli a0, a0, 1
-; RV32IMZBS-NEXT: or a4, a4, a0
-; RV32IMZBS-NEXT: srli a0, a1, 1
-; RV32IMZBS-NEXT: and a1, a1, t5
-; RV32IMZBS-NEXT: and a0, a0, t5
-; RV32IMZBS-NEXT: slli a1, a1, 1
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: andi t6, a0, 2
-; RV32IMZBS-NEXT: and a1, a0, a6
-; RV32IMZBS-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 1
-; RV32IMZBS-NEXT: and a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a5, a0, a5
-; RV32IMZBS-NEXT: sw a5, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s0, a0, s0
-; RV32IMZBS-NEXT: and s1, a0, s1
-; RV32IMZBS-NEXT: and a1, a0, t2
-; RV32IMZBS-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a1, a0, s2
-; RV32IMZBS-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s3, a0, s3
-; RV32IMZBS-NEXT: and a1, a0, s4
-; RV32IMZBS-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s6, a0, s6
-; RV32IMZBS-NEXT: and a1, a0, s7
-; RV32IMZBS-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a1, a0, s8
-; RV32IMZBS-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s9, a0, s9
-; RV32IMZBS-NEXT: and a1, a0, s10
-; RV32IMZBS-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a1, a0, s11
-; RV32IMZBS-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a1, a0, ra
-; RV32IMZBS-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 32768
-; RV32IMZBS-NEXT: and a1, a0, a1
-; RV32IMZBS-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a1, a0, t0
-; RV32IMZBS-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a3, a0, a3
-; RV32IMZBS-NEXT: sw a3, 32(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a2, a0, a2
-; RV32IMZBS-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a1, a0, a7
-; RV32IMZBS-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a1, a0, 1
-; RV32IMZBS-NEXT: andi a2, a0, 4
-; RV32IMZBS-NEXT: andi a3, a0, 8
-; RV32IMZBS-NEXT: andi a5, a0, 16
-; RV32IMZBS-NEXT: andi a6, a0, 32
-; RV32IMZBS-NEXT: andi a7, a0, 64
-; RV32IMZBS-NEXT: andi t0, a0, 128
-; RV32IMZBS-NEXT: andi t1, a0, 256
-; RV32IMZBS-NEXT: andi t2, a0, 512
-; RV32IMZBS-NEXT: andi a0, a0, 1024
-; RV32IMZBS-NEXT: mul t6, a4, t6
-; RV32IMZBS-NEXT: sw t6, 12(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul ra, a4, a1
-; RV32IMZBS-NEXT: mul s11, a4, a2
-; RV32IMZBS-NEXT: mul s8, a4, a3
-; RV32IMZBS-NEXT: mul s7, a4, a5
-; RV32IMZBS-NEXT: mul s4, a4, a6
-; RV32IMZBS-NEXT: mul a1, a4, a7
-; RV32IMZBS-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a1, a4, t0
-; RV32IMZBS-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s2, a4, t1
-; RV32IMZBS-NEXT: mul t2, a4, t2
-; RV32IMZBS-NEXT: mul a0, a4, a0
-; RV32IMZBS-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a4, a0
+; RV32IMZBS-NEXT: and a3, a7, a3
+; RV32IMZBS-NEXT: slli a2, a1, 1
+; RV32IMZBS-NEXT: or s9, t0, a0
+; RV32IMZBS-NEXT: or a1, a3, a2
+; RV32IMZBS-NEXT: srli a2, a2, 31
+; RV32IMZBS-NEXT: slli a0, s9, 1
+; RV32IMZBS-NEXT: andi t0, a1, 2
+; RV32IMZBS-NEXT: slli t2, s9, 2
+; RV32IMZBS-NEXT: andi t3, a1, 4
+; RV32IMZBS-NEXT: slli t4, s9, 3
+; RV32IMZBS-NEXT: andi t5, a1, 8
+; RV32IMZBS-NEXT: slli t6, s9, 4
+; RV32IMZBS-NEXT: andi s0, a1, 16
+; RV32IMZBS-NEXT: slli a7, s9, 5
+; RV32IMZBS-NEXT: andi s2, a1, 32
+; RV32IMZBS-NEXT: slli s3, s9, 6
+; RV32IMZBS-NEXT: andi s4, a1, 64
+; RV32IMZBS-NEXT: slli s5, s9, 7
+; RV32IMZBS-NEXT: andi s6, a1, 128
+; RV32IMZBS-NEXT: slli s1, s9, 8
+; RV32IMZBS-NEXT: andi s7, a1, 256
+; RV32IMZBS-NEXT: slli s8, s9, 9
+; RV32IMZBS-NEXT: andi s10, a1, 512
+; RV32IMZBS-NEXT: slli s11, s9, 10
+; RV32IMZBS-NEXT: andi ra, a1, 1024
+; RV32IMZBS-NEXT: slli t1, s9, 31
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a2, a2, t1
+; RV32IMZBS-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, s9, 11
+; RV32IMZBS-NEXT: seqz t0, t0
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: and a0, t0, a0
; RV32IMZBS-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a4, a0
-; RV32IMZBS-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t1, a4, a0
-; RV32IMZBS-NEXT: mul a7, a4, s0
-; RV32IMZBS-NEXT: mul s1, a4, s1
-; RV32IMZBS-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a4, a0
-; RV32IMZBS-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a4, a0
-; RV32IMZBS-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a4, s3
-; RV32IMZBS-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a4, a0
-; RV32IMZBS-NEXT: mul a2, a4, s6
-; RV32IMZBS-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a6, a4, a0
-; RV32IMZBS-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t6, a4, a0
-; RV32IMZBS-NEXT: mul s6, a4, s9
-; RV32IMZBS-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a1, a4, a0
-; RV32IMZBS-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a4, a0
-; RV32IMZBS-NEXT: lw a5, 44(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a5, a4, a5
-; RV32IMZBS-NEXT: lw t0, 40(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t0, a4, t0
-; RV32IMZBS-NEXT: lw s0, 36(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s0, a4, s0
-; RV32IMZBS-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s3, a4, s3
-; RV32IMZBS-NEXT: lw s9, 28(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s9, a4, s9
-; RV32IMZBS-NEXT: lw s10, 24(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a4, a4, s10
-; RV32IMZBS-NEXT: lw s10, 12(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor ra, ra, s10
-; RV32IMZBS-NEXT: xor s8, s11, s8
-; RV32IMZBS-NEXT: xor s4, s7, s4
-; RV32IMZBS-NEXT: xor t2, s2, t2
-; RV32IMZBS-NEXT: xor a7, t1, a7
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: xor a1, ra, s8
-; RV32IMZBS-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, s4, a3
-; RV32IMZBS-NEXT: lw t1, 8(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t2, t1
-; RV32IMZBS-NEXT: xor a7, a7, s1
-; RV32IMZBS-NEXT: xor a2, a2, a6
-; RV32IMZBS-NEXT: xor a0, a0, a5
-; RV32IMZBS-NEXT: xor a1, a1, a3
-; RV32IMZBS-NEXT: lw a3, 20(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, t1, a3
-; RV32IMZBS-NEXT: lw a5, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a7, a5
-; RV32IMZBS-NEXT: xor a2, a2, t6
-; RV32IMZBS-NEXT: xor a0, a0, t0
-; RV32IMZBS-NEXT: lw a6, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a6
-; RV32IMZBS-NEXT: lw a6, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a6
-; RV32IMZBS-NEXT: lw a6, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: xor a2, a2, s6
-; RV32IMZBS-NEXT: xor a0, a0, s0
-; RV32IMZBS-NEXT: lw a6, 80(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: not t1, a1
+; RV32IMZBS-NEXT: seqz t0, t3
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: and a0, t0, t2
+; RV32IMZBS-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s9, 12
+; RV32IMZBS-NEXT: seqz t2, t5
+; RV32IMZBS-NEXT: addi t2, t2, -1
+; RV32IMZBS-NEXT: and a0, t2, t4
+; RV32IMZBS-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s9, 13
+; RV32IMZBS-NEXT: seqz t3, s0
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: and a0, t3, t6
+; RV32IMZBS-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s9, 14
+; RV32IMZBS-NEXT: seqz t4, s2
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: and t5, t4, a7
+; RV32IMZBS-NEXT: slli a2, s9, 15
+; RV32IMZBS-NEXT: seqz t4, s4
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: and a0, t4, s3
+; RV32IMZBS-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s3, s9, 16
+; RV32IMZBS-NEXT: seqz t6, s6
+; RV32IMZBS-NEXT: addi t6, t6, -1
+; RV32IMZBS-NEXT: and s0, t6, s5
+; RV32IMZBS-NEXT: slli s4, s9, 17
+; RV32IMZBS-NEXT: seqz t6, s7
+; RV32IMZBS-NEXT: addi t6, t6, -1
+; RV32IMZBS-NEXT: and s2, t6, s1
+; RV32IMZBS-NEXT: slli a0, s9, 18
+; RV32IMZBS-NEXT: seqz t6, s10
+; RV32IMZBS-NEXT: addi t6, t6, -1
+; RV32IMZBS-NEXT: and t4, t6, s8
+; RV32IMZBS-NEXT: slli a7, s9, 19
+; RV32IMZBS-NEXT: seqz t6, ra
+; RV32IMZBS-NEXT: addi t6, t6, -1
+; RV32IMZBS-NEXT: and t6, t6, s11
+; RV32IMZBS-NEXT: bexti s5, t1, 11
+; RV32IMZBS-NEXT: addi s5, s5, -1
+; RV32IMZBS-NEXT: and s6, s5, a6
+; RV32IMZBS-NEXT: bexti a6, t1, 12
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: and t0, a6, a5
+; RV32IMZBS-NEXT: bexti a5, t1, 13
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and s5, a5, a4
+; RV32IMZBS-NEXT: bexti a4, t1, 14
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and t2, a4, a3
+; RV32IMZBS-NEXT: bexti a3, t1, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and t3, a3, a2
+; RV32IMZBS-NEXT: bexti a2, t1, 16
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and ra, a2, s3
+; RV32IMZBS-NEXT: bexti a2, t1, 17
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a2, a2, s4
+; RV32IMZBS-NEXT: bexti a5, t1, 18
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and s11, a5, a0
+; RV32IMZBS-NEXT: bexti a5, t1, 19
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a3, a5, a7
+; RV32IMZBS-NEXT: bexti a5, t1, 20
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, s9, 20
+; RV32IMZBS-NEXT: and a4, a5, a6
+; RV32IMZBS-NEXT: bexti a5, t1, 21
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, s9, 21
+; RV32IMZBS-NEXT: and s10, a5, a6
+; RV32IMZBS-NEXT: bexti a5, t1, 22
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: slli a6, s9, 22
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: bexti a6, t1, 23
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: slli s3, s9, 23
+; RV32IMZBS-NEXT: and a6, a6, s3
+; RV32IMZBS-NEXT: bexti s3, t1, 24
+; RV32IMZBS-NEXT: addi s3, s3, -1
+; RV32IMZBS-NEXT: slli s4, s9, 24
+; RV32IMZBS-NEXT: and s3, s3, s4
+; RV32IMZBS-NEXT: bexti s4, t1, 25
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: slli a0, s9, 25
+; RV32IMZBS-NEXT: and a0, s4, a0
+; RV32IMZBS-NEXT: bexti s4, t1, 26
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: slli a7, s9, 26
+; RV32IMZBS-NEXT: and a7, s4, a7
+; RV32IMZBS-NEXT: bexti s4, t1, 27
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: slli s1, s9, 27
+; RV32IMZBS-NEXT: and s1, s4, s1
+; RV32IMZBS-NEXT: bexti s4, t1, 28
+; RV32IMZBS-NEXT: addi s4, s4, -1
+; RV32IMZBS-NEXT: slli s7, s9, 28
+; RV32IMZBS-NEXT: and s4, s4, s7
+; RV32IMZBS-NEXT: bexti s7, t1, 29
+; RV32IMZBS-NEXT: addi s7, s7, -1
+; RV32IMZBS-NEXT: slli s8, s9, 29
+; RV32IMZBS-NEXT: and s7, s7, s8
+; RV32IMZBS-NEXT: andi a1, a1, 1
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a1, a1, s9
+; RV32IMZBS-NEXT: slli s9, s9, 30
+; RV32IMZBS-NEXT: bexti t1, t1, 30
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: and t1, t1, s9
+; RV32IMZBS-NEXT: lw s8, 20(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, s8
+; RV32IMZBS-NEXT: lw s8, 16(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s8, s9
+; RV32IMZBS-NEXT: lw s9, 8(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, s9, t5
+; RV32IMZBS-NEXT: xor s0, s0, s2
+; RV32IMZBS-NEXT: xor t0, s6, t0
+; RV32IMZBS-NEXT: xor a2, ra, a2
; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: xor a0, a0, s3
-; RV32IMZBS-NEXT: xor a3, a1, a3
-; RV32IMZBS-NEXT: slli a1, a1, 24
-; RV32IMZBS-NEXT: xor a3, a3, a5
-; RV32IMZBS-NEXT: xor a0, a0, s9
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: xor a0, a0, a4
-; RV32IMZBS-NEXT: and a3, a2, s5
-; RV32IMZBS-NEXT: srli a4, a2, 8
-; RV32IMZBS-NEXT: xor a0, a2, a0
-; RV32IMZBS-NEXT: slli a3, a3, 8
-; RV32IMZBS-NEXT: and a2, a4, s5
-; RV32IMZBS-NEXT: srli a0, a0, 24
-; RV32IMZBS-NEXT: or a1, a1, a3
-; RV32IMZBS-NEXT: or a0, a2, a0
-; RV32IMZBS-NEXT: or a0, a1, a0
+; RV32IMZBS-NEXT: xor a6, s7, t1
+; RV32IMZBS-NEXT: xor a1, a1, s8
+; RV32IMZBS-NEXT: lw t1, 4(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t5, t1
+; RV32IMZBS-NEXT: xor t4, s0, t4
+; RV32IMZBS-NEXT: xor t0, t0, s5
+; RV32IMZBS-NEXT: xor a2, a2, s11
+; RV32IMZBS-NEXT: xor a5, a5, s3
+; RV32IMZBS-NEXT: lw t5, 24(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a6, t5
+; RV32IMZBS-NEXT: xor a1, a1, t1
+; RV32IMZBS-NEXT: xor t1, t4, t6
+; RV32IMZBS-NEXT: xor t0, t0, t2
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: xor a0, a5, a0
+; RV32IMZBS-NEXT: xor a1, a1, t1
+; RV32IMZBS-NEXT: xor a3, t0, t3
+; RV32IMZBS-NEXT: xor a2, a2, a4
+; RV32IMZBS-NEXT: xor a0, a0, a7
+; RV32IMZBS-NEXT: xor a1, a1, a3
+; RV32IMZBS-NEXT: xor a2, a2, s10
+; RV32IMZBS-NEXT: xor a0, a0, s1
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: xor a0, a0, s4
+; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: xor a0, a0, a6
+; RV32IMZBS-NEXT: srli a1, a0, 8
+; RV32IMZBS-NEXT: srli a2, a0, 24
+; RV32IMZBS-NEXT: slli a3, a0, 24
+; RV32IMZBS-NEXT: lw a4, 40(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a0, a0, a4
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: slli a0, a0, 8
+; RV32IMZBS-NEXT: or a1, a1, a2
+; RV32IMZBS-NEXT: or a0, a3, a0
+; RV32IMZBS-NEXT: or a0, a0, a1
; RV32IMZBS-NEXT: srli a1, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, t3
-; RV32IMZBS-NEXT: and a1, a1, t3
+; RV32IMZBS-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: and a1, a1, a2
; RV32IMZBS-NEXT: slli a0, a0, 4
; RV32IMZBS-NEXT: or a0, a1, a0
; RV32IMZBS-NEXT: srli a1, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, t4
-; RV32IMZBS-NEXT: and a1, a1, t4
+; RV32IMZBS-NEXT: lw a2, 32(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: and a1, a1, a2
; RV32IMZBS-NEXT: slli a0, a0, 2
; RV32IMZBS-NEXT: or a0, a1, a0
; RV32IMZBS-NEXT: srli a1, a0, 1
-; RV32IMZBS-NEXT: and a0, a0, t5
-; RV32IMZBS-NEXT: and a1, a1, t5
+; RV32IMZBS-NEXT: lw a2, 28(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: and a1, a1, a2
; RV32IMZBS-NEXT: slli a0, a0, 1
; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: addi sp, sp, 144
+; RV32IMZBS-NEXT: lw ra, 92(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 88(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 84(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s2, 80(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s3, 76(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s4, 72(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s5, 68(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s6, 64(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 60(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 56(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 52(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s10, 48(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: addi sp, sp, 96
; RV32IMZBS-NEXT: ret
;
; RV64IMZBS-LABEL: clmulr_i32:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -256
-; RV64IMZBS-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 232(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s2, 224(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s3, 216(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s4, 208(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s5, 200(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s6, 192(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s7, 184(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s8, 176(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s9, 168(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s10, 160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s11, 152(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: slli a0, a0, 32
-; RV64IMZBS-NEXT: andi a7, a1, 2
-; RV64IMZBS-NEXT: andi a2, a1, 1
-; RV64IMZBS-NEXT: sd a2, 144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a2, a1, 4
-; RV64IMZBS-NEXT: sd a2, 136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a2, a1, 8
-; RV64IMZBS-NEXT: sd a2, 128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a2, a1, 16
-; RV64IMZBS-NEXT: sd a2, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a2, a1, 32
-; RV64IMZBS-NEXT: sd a2, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti t0, zero, 11
-; RV64IMZBS-NEXT: lui t1, 1
-; RV64IMZBS-NEXT: lui t2, 2
-; RV64IMZBS-NEXT: lui t3, 4
-; RV64IMZBS-NEXT: lui t4, 8
-; RV64IMZBS-NEXT: lui t5, 16
-; RV64IMZBS-NEXT: lui t6, 32
-; RV64IMZBS-NEXT: lui s0, 64
-; RV64IMZBS-NEXT: lui s1, 128
-; RV64IMZBS-NEXT: lui s2, 256
-; RV64IMZBS-NEXT: lui s3, 512
-; RV64IMZBS-NEXT: lui s4, 1024
-; RV64IMZBS-NEXT: lui s5, 2048
-; RV64IMZBS-NEXT: lui s6, 4096
-; RV64IMZBS-NEXT: lui s7, 8192
-; RV64IMZBS-NEXT: lui s8, 16384
-; RV64IMZBS-NEXT: lui s9, 32768
-; RV64IMZBS-NEXT: lui s10, 65536
-; RV64IMZBS-NEXT: lui s11, 131072
-; RV64IMZBS-NEXT: lui ra, 262144
-; RV64IMZBS-NEXT: and t0, a1, t0
-; RV64IMZBS-NEXT: and a2, a1, t1
-; RV64IMZBS-NEXT: sd a2, 104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and t2, a1, t2
-; RV64IMZBS-NEXT: and t3, a1, t3
-; RV64IMZBS-NEXT: and a2, a1, t4
-; RV64IMZBS-NEXT: sd a2, 96(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, t5
-; RV64IMZBS-NEXT: sd a2, 88(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and t6, a1, t6
-; RV64IMZBS-NEXT: and s0, a1, s0
-; RV64IMZBS-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s1, a1, s1
-; RV64IMZBS-NEXT: and a2, a1, s2
-; RV64IMZBS-NEXT: sd a2, 72(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s3, a1, s3
-; RV64IMZBS-NEXT: and a2, a1, s4
-; RV64IMZBS-NEXT: sd a2, 64(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s5
-; RV64IMZBS-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s6, a1, s6
-; RV64IMZBS-NEXT: and a2, a1, s7
-; RV64IMZBS-NEXT: sd a2, 48(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s8
-; RV64IMZBS-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s9
-; RV64IMZBS-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s10
-; RV64IMZBS-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, s11
-; RV64IMZBS-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a1, ra
-; RV64IMZBS-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a6, a1, 64
-; RV64IMZBS-NEXT: andi a2, a1, 128
-; RV64IMZBS-NEXT: andi a3, a1, 256
-; RV64IMZBS-NEXT: andi a4, a1, 512
-; RV64IMZBS-NEXT: andi a5, a1, 1024
-; RV64IMZBS-NEXT: srliw a1, a1, 31
-; RV64IMZBS-NEXT: srli a0, a0, 32
-; RV64IMZBS-NEXT: slli s9, a1, 31
-; RV64IMZBS-NEXT: mul s10, a0, a7
-; RV64IMZBS-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s8, a0, a1
-; RV64IMZBS-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s7, a0, a1
-; RV64IMZBS-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s4, a0, a1
-; RV64IMZBS-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s2, a0, a1
-; RV64IMZBS-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s0, a0, a1
-; RV64IMZBS-NEXT: mul a1, a0, a6
-; RV64IMZBS-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t5, a0, a2
-; RV64IMZBS-NEXT: mul t4, a0, a3
-; RV64IMZBS-NEXT: mul ra, a0, a4
-; RV64IMZBS-NEXT: mul a1, a0, a5
-; RV64IMZBS-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul t1, a0, t0
-; RV64IMZBS-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t0, a0, a1
-; RV64IMZBS-NEXT: mul s5, a0, t2
-; RV64IMZBS-NEXT: mul a1, a0, t3
-; RV64IMZBS-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a6, a0, a1
-; RV64IMZBS-NEXT: mul a5, a0, t6
-; RV64IMZBS-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, a0, a1
-; RV64IMZBS-NEXT: mul s1, a0, s1
-; RV64IMZBS-NEXT: ld a1, 72(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s11, a0, a1
-; RV64IMZBS-NEXT: mul a1, a0, s3
-; RV64IMZBS-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 64(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a4, a0, a1
-; RV64IMZBS-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a3, a0, a1
-; RV64IMZBS-NEXT: mul a7, a0, s6
-; RV64IMZBS-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t2, a0, a1
-; RV64IMZBS-NEXT: ld a1, 40(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t6, a0, a1
-; RV64IMZBS-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s3, a0, a1
-; RV64IMZBS-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s6, a0, a1
-; RV64IMZBS-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, a0, a1
-; RV64IMZBS-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a0, a1
-; RV64IMZBS-NEXT: mul a0, a0, s9
-; RV64IMZBS-NEXT: xor s8, s8, s10
-; RV64IMZBS-NEXT: xor s4, s7, s4
-; RV64IMZBS-NEXT: xor s0, s2, s0
-; RV64IMZBS-NEXT: xor t4, t5, t4
-; RV64IMZBS-NEXT: xor t0, t1, t0
-; RV64IMZBS-NEXT: xor a5, a6, a5
-; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: xor a1, a2, a1
-; RV64IMZBS-NEXT: xor a2, s8, s4
-; RV64IMZBS-NEXT: ld a4, 128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, s0, a4
-; RV64IMZBS-NEXT: xor a6, t4, ra
-; RV64IMZBS-NEXT: xor t0, t0, s5
-; RV64IMZBS-NEXT: xor a5, a5, t3
-; RV64IMZBS-NEXT: xor a3, a3, a7
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: ld a4, 144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a6, a4
-; RV64IMZBS-NEXT: ld a6, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, t0, a6
-; RV64IMZBS-NEXT: xor a5, a5, s1
-; RV64IMZBS-NEXT: xor a3, a3, t2
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: ld a4, 136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a6, a4
-; RV64IMZBS-NEXT: xor a5, a5, s11
-; RV64IMZBS-NEXT: xor a3, a3, t6
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: ld a4, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, -176
+; RV64IMZBS-NEXT: sd ra, 168(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s0, 160(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 152(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 144(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s3, 136(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s4, 128(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s5, 120(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s6, 112(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s7, 104(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s8, 96(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s9, 88(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s10, 80(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s11, 72(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: mv s3, a1
+; RV64IMZBS-NEXT: slli a2, a0, 32
+; RV64IMZBS-NEXT: andi a3, a1, 2
+; RV64IMZBS-NEXT: andi a4, a1, 1
+; RV64IMZBS-NEXT: andi a5, a1, 4
+; RV64IMZBS-NEXT: andi a6, a1, 8
+; RV64IMZBS-NEXT: andi a7, a1, 16
+; RV64IMZBS-NEXT: andi t0, a1, 32
+; RV64IMZBS-NEXT: andi t1, a1, 64
+; RV64IMZBS-NEXT: andi t2, a1, 128
+; RV64IMZBS-NEXT: andi t3, a1, 256
+; RV64IMZBS-NEXT: andi t4, a1, 512
+; RV64IMZBS-NEXT: andi t5, a1, 1024
+; RV64IMZBS-NEXT: not s2, a1
+; RV64IMZBS-NEXT: srli t6, a2, 32
+; RV64IMZBS-NEXT: srli s0, a2, 31
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: seqz a4, a4
+; RV64IMZBS-NEXT: srli a0, a2, 30
+; RV64IMZBS-NEXT: seqz a5, a5
+; RV64IMZBS-NEXT: srli s1, a2, 29
+; RV64IMZBS-NEXT: seqz a6, a6
+; RV64IMZBS-NEXT: srli a1, a2, 28
+; RV64IMZBS-NEXT: seqz a7, a7
+; RV64IMZBS-NEXT: srli s4, a2, 27
+; RV64IMZBS-NEXT: seqz t0, t0
+; RV64IMZBS-NEXT: srli s5, a2, 26
+; RV64IMZBS-NEXT: seqz s6, t1
+; RV64IMZBS-NEXT: srli s7, a2, 25
+; RV64IMZBS-NEXT: seqz t2, t2
+; RV64IMZBS-NEXT: srli s8, a2, 24
+; RV64IMZBS-NEXT: seqz t3, t3
+; RV64IMZBS-NEXT: srli s9, a2, 23
+; RV64IMZBS-NEXT: seqz t4, t4
+; RV64IMZBS-NEXT: srli s10, a2, 22
+; RV64IMZBS-NEXT: seqz s11, t5
+; RV64IMZBS-NEXT: srli ra, a2, 21
+; RV64IMZBS-NEXT: bexti t1, s2, 11
+; RV64IMZBS-NEXT: srli t5, a2, 20
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, s0
+; RV64IMZBS-NEXT: sd a3, 64(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti s0, s2, 12
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a3, a4, t6
+; RV64IMZBS-NEXT: sd a3, 56(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli a3, a2, 19
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and a0, a5, a0
+; RV64IMZBS-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, s2, 13
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: and a4, a6, s1
+; RV64IMZBS-NEXT: sd a4, 40(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli a4, a2, 18
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: and a1, a7, a1
+; RV64IMZBS-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti t6, s2, 14
+; RV64IMZBS-NEXT: addi t0, t0, -1
+; RV64IMZBS-NEXT: and a1, t0, s4
+; RV64IMZBS-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli s4, a2, 17
+; RV64IMZBS-NEXT: addi s6, s6, -1
+; RV64IMZBS-NEXT: and a1, s6, s5
+; RV64IMZBS-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti s5, s2, 15
+; RV64IMZBS-NEXT: addi t2, t2, -1
+; RV64IMZBS-NEXT: and a1, t2, s7
+; RV64IMZBS-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli s6, a2, 16
+; RV64IMZBS-NEXT: addi t3, t3, -1
+; RV64IMZBS-NEXT: and a7, t3, s8
+; RV64IMZBS-NEXT: bexti s7, s2, 16
+; RV64IMZBS-NEXT: addi t4, t4, -1
+; RV64IMZBS-NEXT: and t3, t4, s9
+; RV64IMZBS-NEXT: srli s8, a2, 15
+; RV64IMZBS-NEXT: addi s11, s11, -1
+; RV64IMZBS-NEXT: and t4, s11, s10
+; RV64IMZBS-NEXT: bexti s9, s2, 17
+; RV64IMZBS-NEXT: addi t1, t1, -1
+; RV64IMZBS-NEXT: and t1, t1, ra
+; RV64IMZBS-NEXT: srli s10, a2, 14
+; RV64IMZBS-NEXT: addi s0, s0, -1
+; RV64IMZBS-NEXT: and a5, s0, t5
+; RV64IMZBS-NEXT: bexti t5, s2, 18
+; RV64IMZBS-NEXT: addi s0, a0, -1
+; RV64IMZBS-NEXT: and s0, s0, a3
+; RV64IMZBS-NEXT: srli a3, a2, 13
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: and a4, t6, a4
+; RV64IMZBS-NEXT: bexti t6, s2, 19
+; RV64IMZBS-NEXT: addi s5, s5, -1
+; RV64IMZBS-NEXT: and a6, s5, s4
+; RV64IMZBS-NEXT: srli s11, a2, 12
+; RV64IMZBS-NEXT: addi s7, s7, -1
+; RV64IMZBS-NEXT: and s4, s7, s6
+; RV64IMZBS-NEXT: bexti ra, s2, 20
+; RV64IMZBS-NEXT: addi s9, s9, -1
+; RV64IMZBS-NEXT: and s7, s9, s8
+; RV64IMZBS-NEXT: srli s9, a2, 11
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: and s6, t5, s10
+; RV64IMZBS-NEXT: bexti t5, s2, 21
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: and s5, t6, a3
+; RV64IMZBS-NEXT: srli a3, a2, 10
+; RV64IMZBS-NEXT: addi ra, ra, -1
+; RV64IMZBS-NEXT: and s8, ra, s11
+; RV64IMZBS-NEXT: bexti t6, s2, 22
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: and s9, t5, s9
+; RV64IMZBS-NEXT: srli t5, a2, 9
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: and s10, t6, a3
+; RV64IMZBS-NEXT: bexti a3, s2, 23
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, t5
+; RV64IMZBS-NEXT: bexti t5, s2, 24
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: srli t6, a2, 8
+; RV64IMZBS-NEXT: and s11, t5, t6
+; RV64IMZBS-NEXT: bexti t5, s2, 25
+; RV64IMZBS-NEXT: addi t5, t5, -1
+; RV64IMZBS-NEXT: srli t6, a2, 7
+; RV64IMZBS-NEXT: and t5, t5, t6
+; RV64IMZBS-NEXT: bexti t6, s2, 26
+; RV64IMZBS-NEXT: addi t6, t6, -1
+; RV64IMZBS-NEXT: srli ra, a2, 6
+; RV64IMZBS-NEXT: and t6, t6, ra
+; RV64IMZBS-NEXT: bexti ra, s2, 27
+; RV64IMZBS-NEXT: addi ra, ra, -1
+; RV64IMZBS-NEXT: srli a0, a2, 5
+; RV64IMZBS-NEXT: and a0, ra, a0
+; RV64IMZBS-NEXT: bexti ra, s2, 28
+; RV64IMZBS-NEXT: addi ra, ra, -1
+; RV64IMZBS-NEXT: srli s1, a2, 4
+; RV64IMZBS-NEXT: and s1, ra, s1
+; RV64IMZBS-NEXT: bexti ra, s2, 29
+; RV64IMZBS-NEXT: addi ra, ra, -1
+; RV64IMZBS-NEXT: srli a1, a2, 3
+; RV64IMZBS-NEXT: and a1, ra, a1
+; RV64IMZBS-NEXT: bexti s2, s2, 30
+; RV64IMZBS-NEXT: addi s2, s2, -1
+; RV64IMZBS-NEXT: srli ra, a2, 2
+; RV64IMZBS-NEXT: and s2, s2, ra
+; RV64IMZBS-NEXT: sraiw s3, s3, 31
+; RV64IMZBS-NEXT: srli a2, a2, 1
+; RV64IMZBS-NEXT: seqz s3, s3
+; RV64IMZBS-NEXT: addi s3, s3, -1
+; RV64IMZBS-NEXT: and a2, s3, a2
+; RV64IMZBS-NEXT: ld s3, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, ra, s3
+; RV64IMZBS-NEXT: ld ra, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t0, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor ra, ra, t0
+; RV64IMZBS-NEXT: ld t0, 32(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t2, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, t2
+; RV64IMZBS-NEXT: ld t2, 8(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t2, a7
+; RV64IMZBS-NEXT: xor a5, t1, a5
+; RV64IMZBS-NEXT: xor t1, s4, s7
+; RV64IMZBS-NEXT: xor a3, s10, a3
+; RV64IMZBS-NEXT: xor a1, a1, s2
+; RV64IMZBS-NEXT: xor t2, s3, ra
+; RV64IMZBS-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s2
+; RV64IMZBS-NEXT: xor a7, a7, t3
+; RV64IMZBS-NEXT: xor a5, a5, s0
+; RV64IMZBS-NEXT: xor t1, t1, s6
+; RV64IMZBS-NEXT: xor a3, a3, s11
+; RV64IMZBS-NEXT: xor t0, t2, t0
+; RV64IMZBS-NEXT: xor a7, a7, t4
; RV64IMZBS-NEXT: xor a4, a5, a4
-; RV64IMZBS-NEXT: xor a3, a3, s3
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: xor a3, a3, s6
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: xor a0, a1, a0
-; RV64IMZBS-NEXT: xor a0, a2, a0
+; RV64IMZBS-NEXT: xor a5, t1, s5
+; RV64IMZBS-NEXT: xor a3, a3, t5
+; RV64IMZBS-NEXT: xor a7, t0, a7
+; RV64IMZBS-NEXT: xor a4, a4, a6
+; RV64IMZBS-NEXT: xor a5, a5, s8
+; RV64IMZBS-NEXT: xor a3, a3, t6
+; RV64IMZBS-NEXT: xor a4, a7, a4
+; RV64IMZBS-NEXT: xor a5, a5, s9
+; RV64IMZBS-NEXT: xor a0, a3, a0
+; RV64IMZBS-NEXT: xor a4, a4, a5
+; RV64IMZBS-NEXT: xor a0, a0, s1
+; RV64IMZBS-NEXT: xor a0, a4, a0
+; RV64IMZBS-NEXT: xor a1, a1, a2
+; RV64IMZBS-NEXT: xor a0, a0, a1
; RV64IMZBS-NEXT: srli a0, a0, 31
-; RV64IMZBS-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 232(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s2, 224(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s3, 216(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s4, 208(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s5, 200(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s6, 192(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s7, 184(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s8, 176(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s9, 168(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s10, 160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 152(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 256
+; RV64IMZBS-NEXT: ld ra, 168(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 152(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 144(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 136(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s4, 128(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s5, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s6, 112(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s7, 104(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s8, 96(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s9, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s10, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s11, 72(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 176
; RV64IMZBS-NEXT: ret
%a.ext = zext i32 %a to i64
%b.ext = zext i32 %b to i64
@@ -3677,522 +2353,59 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind {
declare i8 @use(i8, i1)
define void @commutative_clmulr_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
-; RV32I-LABEL: commutative_clmulr_i8:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: zext.b s2, a1
-; RV32I-NEXT: andi a1, a0, 2
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, a0, s4
-; RV32I-NEXT: andi a1, s3, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s3, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s3, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s4, s5, a0
-; RV32I-NEXT: andi a1, s3, 128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s3, s3, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s3, a0
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: srli a0, a0, 7
-; RV32I-NEXT: sb a0, 0(s1)
-; RV32I-NEXT: sb a0, 0(s0)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: commutative_clmulr_i8:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a3
-; RV64I-NEXT: mv s1, a2
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: zext.b s2, a1
-; RV64I-NEXT: andi a1, a0, 2
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, a0, s4
-; RV64I-NEXT: andi a1, s3, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s3, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: andi a1, s3, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s4, s5, a0
-; RV64I-NEXT: andi a1, s3, 128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: srli a0, a0, 7
-; RV64I-NEXT: sb a0, 0(s1)
-; RV64I-NEXT: sb a0, 0(s0)
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 64
-; RV64I-NEXT: ret
-;
-; CHECK-M-LABEL: commutative_clmulr_i8:
-; CHECK-M: # %bb.0:
-; CHECK-M-NEXT: zext.b a1, a1
-; CHECK-M-NEXT: andi a4, a0, 2
-; CHECK-M-NEXT: andi a5, a0, 1
-; CHECK-M-NEXT: andi a6, a0, 4
-; CHECK-M-NEXT: andi a7, a0, 8
-; CHECK-M-NEXT: andi t0, a0, 16
-; CHECK-M-NEXT: andi t1, a0, 32
-; CHECK-M-NEXT: andi t2, a0, 64
-; CHECK-M-NEXT: andi a0, a0, 128
-; CHECK-M-NEXT: mul a4, a1, a4
-; CHECK-M-NEXT: mul a5, a1, a5
-; CHECK-M-NEXT: mul a6, a1, a6
-; CHECK-M-NEXT: mul a7, a1, a7
-; CHECK-M-NEXT: mul t0, a1, t0
-; CHECK-M-NEXT: mul t1, a1, t1
-; CHECK-M-NEXT: mul t2, a1, t2
-; CHECK-M-NEXT: mul a0, a1, a0
-; CHECK-M-NEXT: xor a4, a5, a4
-; CHECK-M-NEXT: xor a1, a6, a7
-; CHECK-M-NEXT: xor a5, t0, t1
-; CHECK-M-NEXT: xor a1, a4, a1
-; CHECK-M-NEXT: xor a4, a5, t2
-; CHECK-M-NEXT: xor a1, a1, a4
-; CHECK-M-NEXT: xor a0, a1, a0
-; CHECK-M-NEXT: srli a0, a0, 7
-; CHECK-M-NEXT: sb a0, 0(a2)
-; CHECK-M-NEXT: sb a0, 0(a3)
-; CHECK-M-NEXT: ret
-;
-; CHECK-ZBS-LABEL: commutative_clmulr_i8:
-; CHECK-ZBS: # %bb.0:
-; CHECK-ZBS-NEXT: zext.b a1, a1
-; CHECK-ZBS-NEXT: andi a4, a0, 2
-; CHECK-ZBS-NEXT: andi a5, a0, 1
-; CHECK-ZBS-NEXT: andi a6, a0, 4
-; CHECK-ZBS-NEXT: andi a7, a0, 8
-; CHECK-ZBS-NEXT: andi t0, a0, 16
-; CHECK-ZBS-NEXT: andi t1, a0, 32
-; CHECK-ZBS-NEXT: andi t2, a0, 64
-; CHECK-ZBS-NEXT: andi a0, a0, 128
-; CHECK-ZBS-NEXT: mul a4, a1, a4
-; CHECK-ZBS-NEXT: mul a5, a1, a5
-; CHECK-ZBS-NEXT: mul a6, a1, a6
-; CHECK-ZBS-NEXT: mul a7, a1, a7
-; CHECK-ZBS-NEXT: mul t0, a1, t0
-; CHECK-ZBS-NEXT: mul t1, a1, t1
-; CHECK-ZBS-NEXT: mul t2, a1, t2
-; CHECK-ZBS-NEXT: mul a0, a1, a0
-; CHECK-ZBS-NEXT: xor a4, a5, a4
-; CHECK-ZBS-NEXT: xor a1, a6, a7
-; CHECK-ZBS-NEXT: xor a5, t0, t1
-; CHECK-ZBS-NEXT: xor a1, a4, a1
-; CHECK-ZBS-NEXT: xor a4, a5, t2
-; CHECK-ZBS-NEXT: xor a1, a1, a4
-; CHECK-ZBS-NEXT: xor a0, a1, a0
-; CHECK-ZBS-NEXT: srli a0, a0, 7
-; CHECK-ZBS-NEXT: sb a0, 0(a2)
-; CHECK-ZBS-NEXT: sb a0, 0(a3)
-; CHECK-ZBS-NEXT: ret
+; CHECK-LABEL: commutative_clmulr_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: zext.b a1, a1
+; CHECK-NEXT: andi a4, a0, 2
+; CHECK-NEXT: andi a5, a0, 4
+; CHECK-NEXT: andi a6, a0, 8
+; CHECK-NEXT: andi a7, a0, 16
+; CHECK-NEXT: andi t0, a0, 32
+; CHECK-NEXT: andi t1, a0, 64
+; CHECK-NEXT: slli t2, a1, 1
+; CHECK-NEXT: seqz a4, a4
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a4, a4, t2
+; CHECK-NEXT: slli t2, a1, 2
+; CHECK-NEXT: seqz a5, a5
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a5, a5, t2
+; CHECK-NEXT: slli t2, a1, 3
+; CHECK-NEXT: seqz a6, a6
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a6, a6, t2
+; CHECK-NEXT: slli t2, a1, 4
+; CHECK-NEXT: seqz a7, a7
+; CHECK-NEXT: addi a7, a7, -1
+; CHECK-NEXT: and a7, a7, t2
+; CHECK-NEXT: slli t2, a1, 5
+; CHECK-NEXT: seqz t0, t0
+; CHECK-NEXT: addi t0, t0, -1
+; CHECK-NEXT: and t0, t0, t2
+; CHECK-NEXT: slli t2, a1, 6
+; CHECK-NEXT: seqz t1, t1
+; CHECK-NEXT: addi t1, t1, -1
+; CHECK-NEXT: and t1, t1, t2
+; CHECK-NEXT: andi t2, a0, 1
+; CHECK-NEXT: andi a0, a0, 128
+; CHECK-NEXT: seqz t2, t2
+; CHECK-NEXT: addi t2, t2, -1
+; CHECK-NEXT: and t2, t2, a1
+; CHECK-NEXT: slli a1, a1, 7
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: xor a1, t2, a4
+; CHECK-NEXT: xor a4, a5, a6
+; CHECK-NEXT: xor a5, a7, t0
+; CHECK-NEXT: xor a1, a1, a4
+; CHECK-NEXT: xor a4, a5, t1
+; CHECK-NEXT: xor a1, a1, a4
+; CHECK-NEXT: xor a0, a1, a0
+; CHECK-NEXT: srli a0, a0, 7
+; CHECK-NEXT: sb a0, 0(a2)
+; CHECK-NEXT: sb a0, 0(a3)
+; CHECK-NEXT: ret
%x.ext = zext i8 %x to i16
%y.ext = zext i8 %y to i16
%clmul_xy = call i16 @llvm.clmul.i16(i16 %x.ext, i16 %y.ext)
@@ -4209,124 +2422,126 @@ define void @commutative_clmulr_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; RV32I-LABEL: mul_use_commutative_clmul_i8:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi a1, a1, 2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s5, a0, s4
-; RV32I-NEXT: andi a1, s3, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s5, s5, a0
-; RV32I-NEXT: andi a1, s3, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: andi a1, s3, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s4, s4, a0
-; RV32I-NEXT: andi a1, s3, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s4, a0
-; RV32I-NEXT: xor s4, s5, a0
-; RV32I-NEXT: andi a1, s3, -128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s4, a0
-; RV32I-NEXT: sb s2, 0(s1)
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: slli a3, a1, 30
+; RV32I-NEXT: slli a4, a0, 1
+; RV32I-NEXT: slli a5, a1, 29
+; RV32I-NEXT: slli a6, a0, 2
+; RV32I-NEXT: slli a7, a1, 28
+; RV32I-NEXT: slli t0, a0, 3
+; RV32I-NEXT: slli t1, a1, 27
+; RV32I-NEXT: srli a3, a3, 31
+; RV32I-NEXT: neg a3, a3
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: slli a4, a0, 4
+; RV32I-NEXT: srli a5, a5, 31
+; RV32I-NEXT: neg a5, a5
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: slli a6, a1, 26
+; RV32I-NEXT: srli a7, a7, 31
+; RV32I-NEXT: neg a7, a7
+; RV32I-NEXT: and a7, a7, t0
+; RV32I-NEXT: slli t0, a0, 5
+; RV32I-NEXT: srli t1, t1, 31
+; RV32I-NEXT: neg t1, t1
+; RV32I-NEXT: and a4, t1, a4
+; RV32I-NEXT: slli t1, a1, 25
+; RV32I-NEXT: srli a6, a6, 31
+; RV32I-NEXT: neg a6, a6
+; RV32I-NEXT: and a6, a6, t0
+; RV32I-NEXT: slli t0, a0, 6
+; RV32I-NEXT: srli t1, t1, 31
+; RV32I-NEXT: neg t1, t1
+; RV32I-NEXT: and t0, t1, t0
+; RV32I-NEXT: slli t1, a1, 31
+; RV32I-NEXT: srai t1, t1, 31
+; RV32I-NEXT: and t1, t1, a0
+; RV32I-NEXT: xor a3, t1, a3
+; RV32I-NEXT: xor a5, a5, a7
+; RV32I-NEXT: xor a4, a4, a6
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: slli a0, a0, 7
+; RV32I-NEXT: srli a1, a1, 31
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: xor a3, a3, a5
+; RV32I-NEXT: xor a4, a4, t0
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: xor s1, a3, a0
+; RV32I-NEXT: sb s1, 0(a2)
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call use
-; RV32I-NEXT: sb s2, 0(s0)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: sb s1, 0(s0)
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: mul_use_commutative_clmul_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a3
-; RV64I-NEXT: mv s1, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: andi a1, a1, 2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, a0, s4
-; RV64I-NEXT: andi a1, s3, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s3, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s4, a0
-; RV64I-NEXT: andi a1, s3, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s4, s4, a0
-; RV64I-NEXT: andi a1, s3, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s4, a0
-; RV64I-NEXT: xor s4, s5, a0
-; RV64I-NEXT: andi a1, s3, -128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s2, s4, a0
-; RV64I-NEXT: sb s2, 0(s1)
-; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: slli a3, a1, 62
+; RV64I-NEXT: slli a4, a0, 1
+; RV64I-NEXT: slli a5, a1, 61
+; RV64I-NEXT: slli a6, a0, 2
+; RV64I-NEXT: slli a7, a1, 60
+; RV64I-NEXT: slli t0, a0, 3
+; RV64I-NEXT: slli t1, a1, 59
+; RV64I-NEXT: srli a3, a3, 63
+; RV64I-NEXT: neg a3, a3
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: srli a5, a5, 63
+; RV64I-NEXT: neg a5, a5
+; RV64I-NEXT: and a5, a5, a6
+; RV64I-NEXT: slli a6, a1, 58
+; RV64I-NEXT: srli a7, a7, 63
+; RV64I-NEXT: neg a7, a7
+; RV64I-NEXT: and a7, a7, t0
+; RV64I-NEXT: slli t0, a0, 5
+; RV64I-NEXT: srli t1, t1, 63
+; RV64I-NEXT: neg t1, t1
+; RV64I-NEXT: and a4, t1, a4
+; RV64I-NEXT: slli t1, a1, 57
+; RV64I-NEXT: srli a6, a6, 63
+; RV64I-NEXT: neg a6, a6
+; RV64I-NEXT: and a6, a6, t0
+; RV64I-NEXT: slli t0, a0, 6
+; RV64I-NEXT: srli t1, t1, 63
+; RV64I-NEXT: neg t1, t1
+; RV64I-NEXT: and t0, t1, t0
+; RV64I-NEXT: slli t1, a1, 63
+; RV64I-NEXT: srai t1, t1, 63
+; RV64I-NEXT: and t1, t1, a0
+; RV64I-NEXT: xor a3, t1, a3
+; RV64I-NEXT: xor a5, a5, a7
+; RV64I-NEXT: xor a4, a4, a6
+; RV64I-NEXT: slli a1, a1, 56
+; RV64I-NEXT: slli a0, a0, 7
+; RV64I-NEXT: srli a1, a1, 63
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: xor a3, a3, a5
+; RV64I-NEXT: xor a4, a4, t0
+; RV64I-NEXT: xor a3, a3, a4
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: xor s1, a3, a0
+; RV64I-NEXT: sb s1, 0(a2)
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call use
-; RV64I-NEXT: sb s2, 0(s0)
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: sb s1, 0(s0)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32IM-LABEL: mul_use_commutative_clmul_i8:
@@ -4336,29 +2551,51 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
; RV32IM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IM-NEXT: mv s0, a3
-; RV32IM-NEXT: andi a3, a1, 2
-; RV32IM-NEXT: andi a4, a1, 1
-; RV32IM-NEXT: andi a5, a1, 4
-; RV32IM-NEXT: andi a6, a1, 8
-; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: xor a3, a4, a3
-; RV32IM-NEXT: andi a4, a1, 16
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: mul a6, a0, a6
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: andi a6, a1, 32
-; RV32IM-NEXT: mul a4, a0, a4
-; RV32IM-NEXT: mul a6, a0, a6
+; RV32IM-NEXT: slli a3, a1, 30
+; RV32IM-NEXT: slli a4, a0, 1
+; RV32IM-NEXT: slli a5, a1, 29
+; RV32IM-NEXT: slli a6, a0, 2
+; RV32IM-NEXT: slli a7, a1, 28
+; RV32IM-NEXT: slli t0, a0, 3
+; RV32IM-NEXT: slli t1, a1, 27
+; RV32IM-NEXT: srli a3, a3, 31
+; RV32IM-NEXT: neg a3, a3
+; RV32IM-NEXT: and a3, a3, a4
+; RV32IM-NEXT: slli a4, a0, 4
+; RV32IM-NEXT: srli a5, a5, 31
+; RV32IM-NEXT: neg a5, a5
+; RV32IM-NEXT: and a5, a5, a6
+; RV32IM-NEXT: slli a6, a1, 26
+; RV32IM-NEXT: srli a7, a7, 31
+; RV32IM-NEXT: neg a7, a7
+; RV32IM-NEXT: and a7, a7, t0
+; RV32IM-NEXT: slli t0, a0, 5
+; RV32IM-NEXT: srli t1, t1, 31
+; RV32IM-NEXT: neg t1, t1
+; RV32IM-NEXT: and a4, t1, a4
+; RV32IM-NEXT: slli t1, a1, 25
+; RV32IM-NEXT: srli a6, a6, 31
+; RV32IM-NEXT: neg a6, a6
+; RV32IM-NEXT: and a6, a6, t0
+; RV32IM-NEXT: slli t0, a0, 6
+; RV32IM-NEXT: srli t1, t1, 31
+; RV32IM-NEXT: neg t1, t1
+; RV32IM-NEXT: and t0, t1, t0
+; RV32IM-NEXT: slli t1, a1, 31
+; RV32IM-NEXT: srai t1, t1, 31
+; RV32IM-NEXT: and t1, t1, a0
+; RV32IM-NEXT: xor a3, t1, a3
+; RV32IM-NEXT: xor a5, a5, a7
; RV32IM-NEXT: xor a4, a4, a6
+; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: slli a0, a0, 7
+; RV32IM-NEXT: srli a1, a1, 31
+; RV32IM-NEXT: neg a1, a1
; RV32IM-NEXT: xor a3, a3, a5
-; RV32IM-NEXT: andi a5, a1, 64
-; RV32IM-NEXT: andi a1, a1, -128
-; RV32IM-NEXT: mul a5, a0, a5
-; RV32IM-NEXT: xor a4, a4, a5
+; RV32IM-NEXT: xor a4, a4, t0
; RV32IM-NEXT: xor a3, a3, a4
-; RV32IM-NEXT: mul s1, a0, a1
-; RV32IM-NEXT: xor s1, a3, s1
+; RV32IM-NEXT: and a0, a1, a0
+; RV32IM-NEXT: xor s1, a3, a0
; RV32IM-NEXT: sb s1, 0(a2)
; RV32IM-NEXT: mv a0, s1
; RV32IM-NEXT: call use
@@ -4376,29 +2613,51 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
; RV64IM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64IM-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64IM-NEXT: mv s0, a3
-; RV64IM-NEXT: andi a3, a1, 2
-; RV64IM-NEXT: andi a4, a1, 1
-; RV64IM-NEXT: andi a5, a1, 4
-; RV64IM-NEXT: andi a6, a1, 8
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: xor a3, a4, a3
-; RV64IM-NEXT: andi a4, a1, 16
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: mul a6, a0, a6
-; RV64IM-NEXT: xor a5, a5, a6
-; RV64IM-NEXT: andi a6, a1, 32
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: slli a3, a1, 62
+; RV64IM-NEXT: slli a4, a0, 1
+; RV64IM-NEXT: slli a5, a1, 61
+; RV64IM-NEXT: slli a6, a0, 2
+; RV64IM-NEXT: slli a7, a1, 60
+; RV64IM-NEXT: slli t0, a0, 3
+; RV64IM-NEXT: slli t1, a1, 59
+; RV64IM-NEXT: srli a3, a3, 63
+; RV64IM-NEXT: neg a3, a3
+; RV64IM-NEXT: and a3, a3, a4
+; RV64IM-NEXT: slli a4, a0, 4
+; RV64IM-NEXT: srli a5, a5, 63
+; RV64IM-NEXT: neg a5, a5
+; RV64IM-NEXT: and a5, a5, a6
+; RV64IM-NEXT: slli a6, a1, 58
+; RV64IM-NEXT: srli a7, a7, 63
+; RV64IM-NEXT: neg a7, a7
+; RV64IM-NEXT: and a7, a7, t0
+; RV64IM-NEXT: slli t0, a0, 5
+; RV64IM-NEXT: srli t1, t1, 63
+; RV64IM-NEXT: neg t1, t1
+; RV64IM-NEXT: and a4, t1, a4
+; RV64IM-NEXT: slli t1, a1, 57
+; RV64IM-NEXT: srli a6, a6, 63
+; RV64IM-NEXT: neg a6, a6
+; RV64IM-NEXT: and a6, a6, t0
+; RV64IM-NEXT: slli t0, a0, 6
+; RV64IM-NEXT: srli t1, t1, 63
+; RV64IM-NEXT: neg t1, t1
+; RV64IM-NEXT: and t0, t1, t0
+; RV64IM-NEXT: slli t1, a1, 63
+; RV64IM-NEXT: srai t1, t1, 63
+; RV64IM-NEXT: and t1, t1, a0
+; RV64IM-NEXT: xor a3, t1, a3
+; RV64IM-NEXT: xor a5, a5, a7
; RV64IM-NEXT: xor a4, a4, a6
+; RV64IM-NEXT: slli a1, a1, 56
+; RV64IM-NEXT: slli a0, a0, 7
+; RV64IM-NEXT: srli a1, a1, 63
+; RV64IM-NEXT: neg a1, a1
; RV64IM-NEXT: xor a3, a3, a5
-; RV64IM-NEXT: andi a5, a1, 64
-; RV64IM-NEXT: andi a1, a1, -128
-; RV64IM-NEXT: mul a5, a0, a5
-; RV64IM-NEXT: xor a4, a4, a5
+; RV64IM-NEXT: xor a4, a4, t0
; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: mul s1, a0, a1
-; RV64IM-NEXT: xor s1, a3, s1
+; RV64IM-NEXT: and a0, a1, a0
+; RV64IM-NEXT: xor s1, a3, a0
; RV64IM-NEXT: sb s1, 0(a2)
; RV64IM-NEXT: mv a0, s1
; RV64IM-NEXT: call use
@@ -4416,29 +2675,44 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
; RV32IMZBS-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: mv s0, a3
-; RV32IMZBS-NEXT: andi a3, a1, 2
-; RV32IMZBS-NEXT: andi a4, a1, 1
-; RV32IMZBS-NEXT: andi a5, a1, 4
-; RV32IMZBS-NEXT: andi a6, a1, 8
-; RV32IMZBS-NEXT: mul a3, a0, a3
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: andi a4, a1, 16
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: mul a6, a0, a6
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: andi a6, a1, 32
-; RV32IMZBS-NEXT: mul a4, a0, a4
-; RV32IMZBS-NEXT: mul a6, a0, a6
+; RV32IMZBS-NEXT: bexti a3, a1, 1
+; RV32IMZBS-NEXT: slli a4, a0, 1
+; RV32IMZBS-NEXT: bexti a5, a1, 2
+; RV32IMZBS-NEXT: slli a6, a0, 2
+; RV32IMZBS-NEXT: bexti a7, a1, 3
+; RV32IMZBS-NEXT: slli t0, a0, 3
+; RV32IMZBS-NEXT: bexti t1, a1, 4
+; RV32IMZBS-NEXT: neg a3, a3
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: slli a4, a0, 4
+; RV32IMZBS-NEXT: neg a5, a5
+; RV32IMZBS-NEXT: and a5, a5, a6
+; RV32IMZBS-NEXT: bexti a6, a1, 5
+; RV32IMZBS-NEXT: neg a7, a7
+; RV32IMZBS-NEXT: and a7, a7, t0
+; RV32IMZBS-NEXT: slli t0, a0, 5
+; RV32IMZBS-NEXT: neg t1, t1
+; RV32IMZBS-NEXT: and a4, t1, a4
+; RV32IMZBS-NEXT: bexti t1, a1, 6
+; RV32IMZBS-NEXT: neg a6, a6
+; RV32IMZBS-NEXT: and a6, a6, t0
+; RV32IMZBS-NEXT: slli t0, a0, 6
+; RV32IMZBS-NEXT: neg t1, t1
+; RV32IMZBS-NEXT: and t0, t1, t0
+; RV32IMZBS-NEXT: slli t1, a1, 31
+; RV32IMZBS-NEXT: srai t1, t1, 31
+; RV32IMZBS-NEXT: and t1, t1, a0
+; RV32IMZBS-NEXT: xor a3, t1, a3
+; RV32IMZBS-NEXT: xor a5, a5, a7
; RV32IMZBS-NEXT: xor a4, a4, a6
+; RV32IMZBS-NEXT: bexti a1, a1, 7
+; RV32IMZBS-NEXT: slli a0, a0, 7
+; RV32IMZBS-NEXT: neg a1, a1
; RV32IMZBS-NEXT: xor a3, a3, a5
-; RV32IMZBS-NEXT: andi a5, a1, 64
-; RV32IMZBS-NEXT: andi a1, a1, -128
-; RV32IMZBS-NEXT: mul a5, a0, a5
-; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: xor a4, a4, t0
; RV32IMZBS-NEXT: xor a3, a3, a4
-; RV32IMZBS-NEXT: mul s1, a0, a1
-; RV32IMZBS-NEXT: xor s1, a3, s1
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: xor s1, a3, a0
; RV32IMZBS-NEXT: sb s1, 0(a2)
; RV32IMZBS-NEXT: mv a0, s1
; RV32IMZBS-NEXT: call use
@@ -4456,29 +2730,44 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
; RV64IMZBS-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64IMZBS-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64IMZBS-NEXT: mv s0, a3
-; RV64IMZBS-NEXT: andi a3, a1, 2
-; RV64IMZBS-NEXT: andi a4, a1, 1
-; RV64IMZBS-NEXT: andi a5, a1, 4
-; RV64IMZBS-NEXT: andi a6, a1, 8
-; RV64IMZBS-NEXT: mul a3, a0, a3
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: xor a3, a4, a3
-; RV64IMZBS-NEXT: andi a4, a1, 16
-; RV64IMZBS-NEXT: mul a5, a0, a5
-; RV64IMZBS-NEXT: mul a6, a0, a6
-; RV64IMZBS-NEXT: xor a5, a5, a6
-; RV64IMZBS-NEXT: andi a6, a1, 32
-; RV64IMZBS-NEXT: mul a4, a0, a4
-; RV64IMZBS-NEXT: mul a6, a0, a6
+; RV64IMZBS-NEXT: bexti a3, a1, 1
+; RV64IMZBS-NEXT: slli a4, a0, 1
+; RV64IMZBS-NEXT: bexti a5, a1, 2
+; RV64IMZBS-NEXT: slli a6, a0, 2
+; RV64IMZBS-NEXT: bexti a7, a1, 3
+; RV64IMZBS-NEXT: slli t0, a0, 3
+; RV64IMZBS-NEXT: bexti t1, a1, 4
+; RV64IMZBS-NEXT: neg a3, a3
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: slli a4, a0, 4
+; RV64IMZBS-NEXT: neg a5, a5
+; RV64IMZBS-NEXT: and a5, a5, a6
+; RV64IMZBS-NEXT: bexti a6, a1, 5
+; RV64IMZBS-NEXT: neg a7, a7
+; RV64IMZBS-NEXT: and a7, a7, t0
+; RV64IMZBS-NEXT: slli t0, a0, 5
+; RV64IMZBS-NEXT: neg t1, t1
+; RV64IMZBS-NEXT: and a4, t1, a4
+; RV64IMZBS-NEXT: bexti t1, a1, 6
+; RV64IMZBS-NEXT: neg a6, a6
+; RV64IMZBS-NEXT: and a6, a6, t0
+; RV64IMZBS-NEXT: slli t0, a0, 6
+; RV64IMZBS-NEXT: neg t1, t1
+; RV64IMZBS-NEXT: and t0, t1, t0
+; RV64IMZBS-NEXT: slli t1, a1, 63
+; RV64IMZBS-NEXT: srai t1, t1, 63
+; RV64IMZBS-NEXT: and t1, t1, a0
+; RV64IMZBS-NEXT: xor a3, t1, a3
+; RV64IMZBS-NEXT: xor a5, a5, a7
; RV64IMZBS-NEXT: xor a4, a4, a6
+; RV64IMZBS-NEXT: bexti a1, a1, 7
+; RV64IMZBS-NEXT: slli a0, a0, 7
+; RV64IMZBS-NEXT: neg a1, a1
; RV64IMZBS-NEXT: xor a3, a3, a5
-; RV64IMZBS-NEXT: andi a5, a1, 64
-; RV64IMZBS-NEXT: andi a1, a1, -128
-; RV64IMZBS-NEXT: mul a5, a0, a5
-; RV64IMZBS-NEXT: xor a4, a4, a5
+; RV64IMZBS-NEXT: xor a4, a4, t0
; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: mul s1, a0, a1
-; RV64IMZBS-NEXT: xor s1, a3, s1
+; RV64IMZBS-NEXT: and a0, a1, a0
+; RV64IMZBS-NEXT: xor s1, a3, a0
; RV64IMZBS-NEXT: sb s1, 0(a2)
; RV64IMZBS-NEXT: mv a0, s1
; RV64IMZBS-NEXT: call use
@@ -4499,6111 +2788,6933 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
define void @commutative_clmulr_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %p1) nounwind {
; RV32I-LABEL: commutative_clmulr_v2i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -336
-; RV32I-NEXT: sw ra, 332(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 328(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 324(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 320(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 316(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 312(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 308(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 304(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 300(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 296(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 292(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s10, 288(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s11, 284(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a3, 276(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a2, 272(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s10, 0(a0)
-; RV32I-NEXT: lw a2, 4(a0)
-; RV32I-NEXT: lw a3, 8(a0)
-; RV32I-NEXT: sw a3, 268(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -800
+; RV32I-NEXT: sw ra, 796(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 792(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 788(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 784(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 780(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 776(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 772(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 768(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 764(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s8, 760(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s9, 756(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s10, 752(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s11, 748(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a3, 736(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a2, 732(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a2, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
+; RV32I-NEXT: lw t0, 8(a0)
; RV32I-NEXT: lw a0, 12(a0)
-; RV32I-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw s7, 0(a1)
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw a3, 8(a1)
-; RV32I-NEXT: sw a3, 136(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t2, 4(a1)
+; RV32I-NEXT: lw t1, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lui a4, 16
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: lui a3, 209715
-; RV32I-NEXT: lui a5, 349525
-; RV32I-NEXT: addi s6, a4, -256
-; RV32I-NEXT: addi s5, a1, -241
-; RV32I-NEXT: addi s4, a3, 819
-; RV32I-NEXT: addi s3, a5, 1365
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a3, a0, 24
-; RV32I-NEXT: and a4, a0, s6
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srli a5, a2, 8
-; RV32I-NEXT: srli a6, a2, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: and a3, a2, s6
-; RV32I-NEXT: slli a2, a2, 24
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: and a5, a5, s6
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a0, a0, a4
-; RV32I-NEXT: or a4, a5, a6
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: or a2, a2, a4
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: srli a3, a2, 4
-; RV32I-NEXT: and a2, a2, s5
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: and a3, a3, s5
-; RV32I-NEXT: slli a2, a2, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: srli a3, a2, 2
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s4
-; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: srli a3, a2, 1
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, s3
-; RV32I-NEXT: slli a2, a2, 1
-; RV32I-NEXT: or s2, a1, a0
-; RV32I-NEXT: or s9, a3, a2
-; RV32I-NEXT: srli a0, s2, 8
-; RV32I-NEXT: srli a1, s2, 24
-; RV32I-NEXT: slli a2, s2, 24
-; RV32I-NEXT: and a3, s2, s6
-; RV32I-NEXT: srli a4, s9, 8
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, s9, 24
+; RV32I-NEXT: lui a7, 16
+; RV32I-NEXT: lui a3, 61681
+; RV32I-NEXT: lui a5, 209715
+; RV32I-NEXT: lui a6, 349525
+; RV32I-NEXT: addi ra, a7, -256
+; RV32I-NEXT: addi s11, a3, -241
+; RV32I-NEXT: addi s10, a5, 819
+; RV32I-NEXT: addi s9, a6, 1365
+; RV32I-NEXT: srli a7, t2, 8
+; RV32I-NEXT: srli t4, t2, 24
+; RV32I-NEXT: and a3, t2, ra
+; RV32I-NEXT: slli t2, t2, 24
+; RV32I-NEXT: srli a5, a4, 8
+; RV32I-NEXT: srli s3, a4, 24
+; RV32I-NEXT: and t3, a4, ra
+; RV32I-NEXT: slli a4, a4, 24
+; RV32I-NEXT: srli t5, s7, 8
+; RV32I-NEXT: srli s6, s7, 24
+; RV32I-NEXT: and s1, s7, ra
+; RV32I-NEXT: slli s7, s7, 24
+; RV32I-NEXT: srli s8, a2, 8
+; RV32I-NEXT: srli t6, a2, 24
+; RV32I-NEXT: and s0, a2, ra
+; RV32I-NEXT: slli a6, a2, 24
+; RV32I-NEXT: and a2, a7, ra
+; RV32I-NEXT: or a2, a2, t4
+; RV32I-NEXT: srli s2, a1, 8
; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: slli a3, s9, 24
-; RV32I-NEXT: and a4, a4, s6
+; RV32I-NEXT: or a3, t2, a3
+; RV32I-NEXT: srli s4, a1, 24
+; RV32I-NEXT: and a5, a5, ra
+; RV32I-NEXT: or t2, a5, s3
+; RV32I-NEXT: and s3, a1, ra
+; RV32I-NEXT: slli s5, a1, 24
+; RV32I-NEXT: slli a1, t3, 8
; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: and a4, s9, s6
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: and a2, a2, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: and a3, a3, s5
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s4
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srli a2, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: or s0, a2, a0
-; RV32I-NEXT: or s8, a3, a1
-; RV32I-NEXT: andi a1, s8, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, a0, s1
-; RV32I-NEXT: andi a1, s8, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: andi a1, s8, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: andi a1, s8, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s8, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s8, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: slli a1, a1, 11
-; RV32I-NEXT: sw a1, 280(sp) # 4-byte Folded Spill
-; RV32I-NEXT: and a1, s8, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lui a1, 1024
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: and a1, s8, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lui a1, 131072
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: and a1, s8, a1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s8, a0
-; RV32I-NEXT: lui s8, 524288
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lui a4, 349525
-; RV32I-NEXT: addi a4, a4, 1364
-; RV32I-NEXT: sw a4, 124(sp) # 4-byte Folded Spill
-; RV32I-NEXT: srli a1, s7, 8
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: srli a2, s7, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: and a2, s7, s6
-; RV32I-NEXT: slli s7, s7, 24
-; RV32I-NEXT: slli a2, a2, 8
-; RV32I-NEXT: or a2, s7, a2
-; RV32I-NEXT: srli a3, a0, 8
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: and a3, a3, s6
+; RV32I-NEXT: srli a5, a0, 8
+; RV32I-NEXT: and a4, t5, ra
+; RV32I-NEXT: or t3, a4, s6
+; RV32I-NEXT: srli t5, a0, 24
+; RV32I-NEXT: slli s1, s1, 8
+; RV32I-NEXT: or a4, s7, s1
+; RV32I-NEXT: and a7, a0, ra
+; RV32I-NEXT: slli t4, a0, 24
+; RV32I-NEXT: and a0, s8, ra
+; RV32I-NEXT: or a0, a0, t6
+; RV32I-NEXT: srli s1, t1, 8
+; RV32I-NEXT: slli s0, s0, 8
+; RV32I-NEXT: or t6, a6, s0
+; RV32I-NEXT: srli a6, t1, 24
+; RV32I-NEXT: and s0, s2, ra
+; RV32I-NEXT: or s0, s0, s4
+; RV32I-NEXT: and s7, t1, ra
+; RV32I-NEXT: slli s6, t1, 24
+; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or t1, s5, s3
+; RV32I-NEXT: srli s2, t0, 8
+; RV32I-NEXT: and a5, a5, ra
+; RV32I-NEXT: or a5, a5, t5
+; RV32I-NEXT: srli t5, t0, 24
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a7, t4, a7
+; RV32I-NEXT: and t4, t0, ra
+; RV32I-NEXT: slli t0, t0, 24
+; RV32I-NEXT: and s1, s1, ra
+; RV32I-NEXT: slli s7, s7, 8
+; RV32I-NEXT: and s2, s2, ra
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or a6, s1, a6
+; RV32I-NEXT: or s1, s6, s7
+; RV32I-NEXT: or t5, s2, t5
+; RV32I-NEXT: or t0, t0, t4
; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: slli a3, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: and a2, a3, s5
+; RV32I-NEXT: or a1, a1, t2
+; RV32I-NEXT: or a3, a4, t3
+; RV32I-NEXT: or a0, t6, a0
+; RV32I-NEXT: or a4, t1, s0
+; RV32I-NEXT: or a5, a7, a5
+; RV32I-NEXT: or a6, s1, a6
+; RV32I-NEXT: or a7, t0, t5
+; RV32I-NEXT: srli t0, a2, 4
+; RV32I-NEXT: sw s11, 744(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, a2, s11
+; RV32I-NEXT: srli t1, a1, 4
+; RV32I-NEXT: and a1, a1, s11
+; RV32I-NEXT: srli t2, a3, 4
+; RV32I-NEXT: and a3, a3, s11
+; RV32I-NEXT: srli t3, a0, 4
+; RV32I-NEXT: and a0, a0, s11
+; RV32I-NEXT: srli t4, a4, 4
+; RV32I-NEXT: and a4, a4, s11
+; RV32I-NEXT: srli t5, a5, 4
+; RV32I-NEXT: and a5, a5, s11
+; RV32I-NEXT: srli t6, a6, 4
+; RV32I-NEXT: and a6, a6, s11
+; RV32I-NEXT: srli s0, a7, 4
+; RV32I-NEXT: and a7, a7, s11
+; RV32I-NEXT: and t0, t0, s11
+; RV32I-NEXT: slli a2, a2, 4
+; RV32I-NEXT: and t1, t1, s11
; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: srli a3, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: and a2, a3, s5
+; RV32I-NEXT: and t2, t2, s11
+; RV32I-NEXT: slli a3, a3, 4
+; RV32I-NEXT: and t3, t3, s11
; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: and a2, a3, s4
+; RV32I-NEXT: and t4, t4, s11
+; RV32I-NEXT: slli a4, a4, 4
+; RV32I-NEXT: and t5, t5, s11
+; RV32I-NEXT: slli a5, a5, 4
+; RV32I-NEXT: and t6, t6, s11
+; RV32I-NEXT: slli a6, a6, 4
+; RV32I-NEXT: and s0, s0, s11
+; RV32I-NEXT: slli a7, a7, 4
+; RV32I-NEXT: or a2, t0, a2
+; RV32I-NEXT: or a1, t1, a1
+; RV32I-NEXT: or a3, t2, a3
+; RV32I-NEXT: or a0, t3, a0
+; RV32I-NEXT: or a4, t4, a4
+; RV32I-NEXT: or a5, t5, a5
+; RV32I-NEXT: or a6, t6, a6
+; RV32I-NEXT: or a7, s0, a7
+; RV32I-NEXT: srli t0, a2, 2
+; RV32I-NEXT: sw s10, 724(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, a2, s10
+; RV32I-NEXT: srli t1, a1, 2
+; RV32I-NEXT: and a1, a1, s10
+; RV32I-NEXT: srli t2, a3, 2
+; RV32I-NEXT: and a3, a3, s10
+; RV32I-NEXT: srli t3, a0, 2
+; RV32I-NEXT: and a0, a0, s10
+; RV32I-NEXT: srli t4, a4, 2
+; RV32I-NEXT: and a4, a4, s10
+; RV32I-NEXT: srli t5, a5, 2
+; RV32I-NEXT: and a5, a5, s10
+; RV32I-NEXT: srli t6, a6, 2
+; RV32I-NEXT: and a6, a6, s10
+; RV32I-NEXT: srli s0, a7, 2
+; RV32I-NEXT: and a7, a7, s10
+; RV32I-NEXT: and t0, t0, s10
+; RV32I-NEXT: slli a2, a2, 2
+; RV32I-NEXT: and t1, t1, s10
; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: srli a3, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: and a2, a3, s4
+; RV32I-NEXT: and t2, t2, s10
+; RV32I-NEXT: slli a3, a3, 2
+; RV32I-NEXT: and t3, t3, s10
; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: and a2, a3, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: srli a3, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: or s0, a2, a1
-; RV32I-NEXT: and a1, a3, a4
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
-; RV32I-NEXT: andi a1, s9, 2
-; RV32I-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s9, 1
-; RV32I-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s7, a0, s1
-; RV32I-NEXT: andi a1, s9, 4
-; RV32I-NEXT: sw a1, 256(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s9, 8
-; RV32I-NEXT: sw a1, 252(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s7, s7, a0
-; RV32I-NEXT: andi a1, s9, 16
-; RV32I-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s9, 32
+; RV32I-NEXT: and t4, t4, s10
+; RV32I-NEXT: slli a4, a4, 2
+; RV32I-NEXT: and t5, t5, s10
+; RV32I-NEXT: slli a5, a5, 2
+; RV32I-NEXT: and t6, t6, s10
+; RV32I-NEXT: slli a6, a6, 2
+; RV32I-NEXT: and s0, s0, s10
+; RV32I-NEXT: slli a7, a7, 2
+; RV32I-NEXT: or a2, t0, a2
+; RV32I-NEXT: or a1, t1, a1
+; RV32I-NEXT: or a3, t2, a3
+; RV32I-NEXT: or a0, t3, a0
+; RV32I-NEXT: or a4, t4, a4
+; RV32I-NEXT: or a5, t5, a5
+; RV32I-NEXT: or a6, t6, a6
+; RV32I-NEXT: or a7, s0, a7
+; RV32I-NEXT: srli t0, a2, 1
+; RV32I-NEXT: sw s9, 728(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, a2, s9
+; RV32I-NEXT: srli t1, a1, 1
+; RV32I-NEXT: and a1, a1, s9
+; RV32I-NEXT: srli t2, a3, 1
+; RV32I-NEXT: and a3, a3, s9
+; RV32I-NEXT: srli t3, a0, 1
+; RV32I-NEXT: and a0, a0, s9
+; RV32I-NEXT: srli t4, a4, 1
+; RV32I-NEXT: and a4, a4, s9
+; RV32I-NEXT: srli t5, a5, 1
+; RV32I-NEXT: and a5, a5, s9
+; RV32I-NEXT: srli t6, a6, 1
+; RV32I-NEXT: and a6, a6, s9
+; RV32I-NEXT: srli s0, a7, 1
+; RV32I-NEXT: and a7, a7, s9
+; RV32I-NEXT: and t0, t0, s9
+; RV32I-NEXT: slli a2, a2, 1
+; RV32I-NEXT: and t1, t1, s9
+; RV32I-NEXT: slli s1, a1, 1
+; RV32I-NEXT: and t2, t2, s9
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: and t3, t3, s9
+; RV32I-NEXT: slli s2, a0, 1
+; RV32I-NEXT: and t4, t4, s9
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: and t5, t5, s9
+; RV32I-NEXT: slli s3, a5, 1
+; RV32I-NEXT: and s4, t6, s9
+; RV32I-NEXT: slli a6, a6, 1
+; RV32I-NEXT: and s5, s0, s9
+; RV32I-NEXT: slli s0, a7, 1
+; RV32I-NEXT: or a0, t0, a2
+; RV32I-NEXT: or a1, t1, s1
+; RV32I-NEXT: or a2, t2, a3
+; RV32I-NEXT: srli t6, s1, 31
+; RV32I-NEXT: or a3, t3, s2
+; RV32I-NEXT: srli a5, s2, 31
+; RV32I-NEXT: or t0, t4, a4
+; RV32I-NEXT: or t3, t5, s3
+; RV32I-NEXT: or t1, s4, a6
+; RV32I-NEXT: srli a4, s3, 31
+; RV32I-NEXT: or s6, s5, s0
+; RV32I-NEXT: srli t2, s0, 31
+; RV32I-NEXT: srli a6, a0, 8
+; RV32I-NEXT: srli a7, a0, 24
+; RV32I-NEXT: srli t4, a1, 8
+; RV32I-NEXT: srli t5, a1, 24
+; RV32I-NEXT: slli s1, a1, 24
+; RV32I-NEXT: and s2, a1, ra
+; RV32I-NEXT: slli s3, a2, 31
+; RV32I-NEXT: seqz s4, t6
+; RV32I-NEXT: srli t6, t0, 8
+; RV32I-NEXT: and a6, a6, ra
+; RV32I-NEXT: or s0, a6, a7
+; RV32I-NEXT: srli a7, t0, 24
+; RV32I-NEXT: and a6, t4, ra
+; RV32I-NEXT: or a6, a6, t5
+; RV32I-NEXT: srli t5, t3, 8
+; RV32I-NEXT: slli s2, s2, 8
+; RV32I-NEXT: or t4, s1, s2
+; RV32I-NEXT: srli s1, t3, 24
+; RV32I-NEXT: and t6, t6, ra
+; RV32I-NEXT: or a7, t6, a7
+; RV32I-NEXT: slli t6, t3, 24
+; RV32I-NEXT: and t5, t5, ra
+; RV32I-NEXT: or t5, t5, s1
+; RV32I-NEXT: and s1, t3, ra
+; RV32I-NEXT: slli s1, s1, 8
+; RV32I-NEXT: or s1, t6, s1
+; RV32I-NEXT: slli t6, a0, 31
+; RV32I-NEXT: seqz s2, a5
+; RV32I-NEXT: addi a5, s4, -1
+; RV32I-NEXT: addi s2, s2, -1
+; RV32I-NEXT: and s3, a5, s3
+; RV32I-NEXT: sw s3, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and s2, s2, t6
+; RV32I-NEXT: sw s2, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a5, a5, t6
+; RV32I-NEXT: sw a5, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, t1, 31
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: seqz t6, t2
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi t6, t6, -1
+; RV32I-NEXT: and a5, a4, a5
+; RV32I-NEXT: sw a5, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, t0, 31
+; RV32I-NEXT: and t2, t6, a5
+; RV32I-NEXT: sw t2, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: sw a4, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw ra, 740(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a0, ra
+; RV32I-NEXT: slli a4, a4, 8
+; RV32I-NEXT: slli t6, a0, 24
+; RV32I-NEXT: or a4, t6, a4
+; RV32I-NEXT: or a4, a4, s0
+; RV32I-NEXT: sw a4, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a4, t4, a6
+; RV32I-NEXT: sw a4, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, t0, ra
+; RV32I-NEXT: slli a4, a4, 8
+; RV32I-NEXT: slli s3, t0, 24
+; RV32I-NEXT: or a4, s3, a4
+; RV32I-NEXT: or a4, a4, a7
+; RV32I-NEXT: sw a4, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a4, s1, t5
+; RV32I-NEXT: sw a4, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 2
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 2
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 1
+; RV32I-NEXT: and s4, a4, a6
+; RV32I-NEXT: slli a6, a0, 1
+; RV32I-NEXT: and s2, a5, a6
+; RV32I-NEXT: and t5, a4, a6
+; RV32I-NEXT: andi a4, a1, 4
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 4
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 2
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 2
+; RV32I-NEXT: and s7, a5, a6
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 8
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 8
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 3
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 3
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 16
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 16
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 4
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 4
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 652(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 32
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 32
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 5
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 5
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 64
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 64
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 6
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 6
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 128
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 128
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 7
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 7
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 256
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 256
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 8
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 8
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 512
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 512
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 9
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 9
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a4, a1, 1024
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 1024
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 10
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 10
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a4, 1
+; RV32I-NEXT: slli a7, a4, 11
+; RV32I-NEXT: and a4, a1, a7
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a7
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 11
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 488(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 11
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 1
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 12
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 476(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 12
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 484(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 508(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 2
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 13
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 512(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 13
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 560(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 4
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 14
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 14
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 8
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 15
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 15
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 16
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 16
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 16
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 416(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 448(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 32
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 17
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 396(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 17
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 404(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 420(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 64
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 18
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 436(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 18
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 444(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 480(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 128
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 19
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 496(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 19
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 504(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 524(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 256
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 20
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 20
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 532(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 512
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 21
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 21
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 564(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 1024
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 22
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 368(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 22
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 372(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 384(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 2048
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 23
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 360(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 23
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 364(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 376(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 4096
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 24
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 380(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a5, a5, t6
+; RV32I-NEXT: sw a5, 388(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, t6
+; RV32I-NEXT: sw a4, 392(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 8192
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 25
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 400(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 25
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 408(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 424(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 16384
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 26
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 428(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 26
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 432(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 460(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 32768
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 27
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 452(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 27
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 456(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 464(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 65536
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 28
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 472(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 28
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 468(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sw a4, 492(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 131072
+; RV32I-NEXT: and a4, a1, a5
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: and a5, a3, a5
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 29
+; RV32I-NEXT: and a6, a4, a6
+; RV32I-NEXT: sw a6, 348(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, a0, 29
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sw a5, 352(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and t6, a4, a6
+; RV32I-NEXT: lui a5, 262144
+; RV32I-NEXT: andi a4, a1, 1
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: lui t2, 262144
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: andi a5, a3, 1
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, a2, 30
+; RV32I-NEXT: and a2, a4, a2
+; RV32I-NEXT: sw a2, 308(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a5, a5, a0
+; RV32I-NEXT: sw a5, 332(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a4, a4, a0
+; RV32I-NEXT: sw a4, 356(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a0, a0, 30
+; RV32I-NEXT: and a2, a3, t2
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a3, a1, a6
+; RV32I-NEXT: sw a3, 304(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a2, a2, a0
+; RV32I-NEXT: sw a2, 320(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a3, a1, a0
+; RV32I-NEXT: andi a0, t3, 2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 2
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 1
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 300(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 1
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 312(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 4
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 2
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 280(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 288(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 8
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 8
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 3
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 260(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 3
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 16
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 16
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 4
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 236(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 4
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s9, 64
-; RV32I-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s7, s7, a0
-; RV32I-NEXT: andi a1, s9, 128
-; RV32I-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s9, 256
-; RV32I-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s9, 512
-; RV32I-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s9, 1024
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 276(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 32
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 32
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 5
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 212(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 5
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s7, s7, a0
-; RV32I-NEXT: lw s11, 280(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, s9, s11
-; RV32I-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s7, s7, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s9, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 64
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 64
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 6
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 284(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 6
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 292(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 128
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 128
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 7
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 192(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 7
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 196(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 188(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 180(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s7, s7, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s9, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 256
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 256
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 8
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 8
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 172(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s9, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 512
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 512
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 9
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 216(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 9
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 228(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 1024
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: andi a1, s6, 1024
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 10
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 316(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 10
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 324(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a7, 440(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, a7
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a7
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 11
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 112(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 11
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 12
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 104(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 12
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 2
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 13
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 148(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 13
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 156(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 180(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 4
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 14
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 220(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 14
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 8
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 15
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 256(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 15
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli s0, t1, 16
+; RV32I-NEXT: and s0, a0, s0
+; RV32I-NEXT: slli a2, t0, 16
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 32
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 17
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 17
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 64
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 18
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 76(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 18
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 128
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 19
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 136(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 19
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 152(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 256
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 20
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 160(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 20
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 176(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 512
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 21
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 184(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a2, t0, 21
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sw a1, 188(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 1024
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a2, t1, 22
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a5, t0, 22
+; RV32I-NEXT: and a2, a1, a5
+; RV32I-NEXT: and a0, a0, a5
+; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a1, 2048
+; RV32I-NEXT: and a0, t3, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a1, s6, a1
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a4, a0, -1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a0, t1, 23
+; RV32I-NEXT: and a0, a4, a0
+; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a6, t0, 23
+; RV32I-NEXT: and a5, a1, a6
+; RV32I-NEXT: and a0, a4, a6
+; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a0, 4096
+; RV32I-NEXT: and a6, t3, a0
+; RV32I-NEXT: seqz a6, a6
+; RV32I-NEXT: and a7, s6, a0
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: addi t4, a6, -1
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: slli a6, t1, 24
+; RV32I-NEXT: and a0, t4, a6
+; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, a7, s3
+; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t4, s3
+; RV32I-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 164(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and s1, t3, a0
+; RV32I-NEXT: seqz s1, s1
+; RV32I-NEXT: and s3, s6, a0
+; RV32I-NEXT: seqz s3, s3
+; RV32I-NEXT: addi s5, s1, -1
+; RV32I-NEXT: addi s3, s3, -1
+; RV32I-NEXT: slli s1, t1, 25
+; RV32I-NEXT: and a0, s5, s1
+; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s8, t0, 25
+; RV32I-NEXT: and a0, s3, s8
+; RV32I-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s5, s8
+; RV32I-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and s5, t3, a0
+; RV32I-NEXT: seqz s5, s5
+; RV32I-NEXT: and s8, s6, a0
+; RV32I-NEXT: seqz s8, s8
+; RV32I-NEXT: addi s9, s5, -1
+; RV32I-NEXT: addi s8, s8, -1
+; RV32I-NEXT: slli s5, t1, 26
+; RV32I-NEXT: and a0, s9, s5
+; RV32I-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s10, t0, 26
+; RV32I-NEXT: and a0, s8, s10
+; RV32I-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s9, s10
+; RV32I-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 156(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: and s9, t3, a0
+; RV32I-NEXT: seqz s9, s9
+; RV32I-NEXT: and s10, s6, a0
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: addi s9, s9, -1
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: slli s11, t1, 27
+; RV32I-NEXT: and a0, s9, s11
+; RV32I-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s11, t0, 27
+; RV32I-NEXT: and a0, s10, s11
+; RV32I-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s9, s11
+; RV32I-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s7, s7, a0
+; RV32I-NEXT: and s9, t3, a0
+; RV32I-NEXT: seqz s9, s9
+; RV32I-NEXT: and s10, s6, a0
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: addi s9, s9, -1
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: slli s11, t1, 28
+; RV32I-NEXT: and a0, s9, s11
+; RV32I-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli s11, t0, 28
+; RV32I-NEXT: and a0, s10, s11
+; RV32I-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, s9, s11
+; RV32I-NEXT: sw a0, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 148(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: and s9, t3, a0
+; RV32I-NEXT: seqz s9, s9
+; RV32I-NEXT: and s10, s6, a0
+; RV32I-NEXT: seqz s10, s10
+; RV32I-NEXT: addi s9, s9, -1
+; RV32I-NEXT: addi s10, s10, -1
+; RV32I-NEXT: slli s11, t1, 29
+; RV32I-NEXT: and s11, s9, s11
+; RV32I-NEXT: slli ra, t0, 29
+; RV32I-NEXT: and a4, s10, ra
+; RV32I-NEXT: and a0, s9, ra
+; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi ra, t3, 1
; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: and a1, s9, s8
-; RV32I-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lui s9, 524288
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: srli a1, s10, 8
-; RV32I-NEXT: xor s1, s7, a0
-; RV32I-NEXT: srli a0, s10, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: and a1, s10, s6
-; RV32I-NEXT: slli s10, s10, 24
-; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: or a1, s10, a1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
+; RV32I-NEXT: and t3, t3, a0
+; RV32I-NEXT: seqz ra, ra
+; RV32I-NEXT: andi s8, s6, 1
+; RV32I-NEXT: seqz s8, s8
+; RV32I-NEXT: addi ra, ra, -1
+; RV32I-NEXT: addi s8, s8, -1
+; RV32I-NEXT: slli t2, t1, 30
+; RV32I-NEXT: and t1, ra, t1
+; RV32I-NEXT: and s8, s8, t0
+; RV32I-NEXT: and a6, ra, t0
+; RV32I-NEXT: slli t0, t0, 30
+; RV32I-NEXT: and s6, s6, a0
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: seqz s6, s6
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: addi s6, s6, -1
+; RV32I-NEXT: and t2, t3, t2
+; RV32I-NEXT: and s6, s6, t0
+; RV32I-NEXT: and a0, t3, t0
+; RV32I-NEXT: sw a0, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, s4
+; RV32I-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 368(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, s2
+; RV32I-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, s7, a0
+; RV32I-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 372(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 320(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, t1, a0
+; RV32I-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, s0, a0
+; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a0, a1
+; RV32I-NEXT: xor s11, s11, t2
+; RV32I-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, s8, a0
+; RV32I-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 268(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, a0, a1
+; RV32I-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, a0, a1
+; RV32I-NEXT: lw a0, 200(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s7, a0, a1
+; RV32I-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, a0, s0
+; RV32I-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, a0, s1
+; RV32I-NEXT: xor s3, a2, a5
+; RV32I-NEXT: xor s6, a4, s6
+; RV32I-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, a0, t5
+; RV32I-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a0, a1
+; RV32I-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a0, a1
+; RV32I-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a0, a1
+; RV32I-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, a0, a1
+; RV32I-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, a0, a1
+; RV32I-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a0, a1
+; RV32I-NEXT: xor t6, t6, a3
+; RV32I-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a6, a0
+; RV32I-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 296(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 276(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 252(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 208(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 204(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: lw a3, 144(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a4, 132(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: lw a4, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a6
+; RV32I-NEXT: lw a6, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, s8
+; RV32I-NEXT: lw s8, 308(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s8, s9
+; RV32I-NEXT: sw s8, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 512(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 436(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 488(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 380(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 476(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 396(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s8, s9
+; RV32I-NEXT: sw s8, 512(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 508(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 500(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 444(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 484(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 488(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 388(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 416(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 484(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 404(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 368(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s8, s9
+; RV32I-NEXT: sw s8, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 284(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 364(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 216(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 360(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: sw s8, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 148(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 352(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, s9, s8
+; RV32I-NEXT: lw s8, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, s10, s8
+; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, ra, s8
+; RV32I-NEXT: sw s8, 652(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s11, s8
+; RV32I-NEXT: sw s8, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s8, 372(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s8, s2
+; RV32I-NEXT: lw s2, 292(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s5, s2
+; RV32I-NEXT: sw s2, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s2, 228(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s7, s2
+; RV32I-NEXT: sw s2, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s2, 156(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s0, s2
+; RV32I-NEXT: sw s0, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 80(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s1, s0
+; RV32I-NEXT: sw s0, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s3, s0
+; RV32I-NEXT: sw s0, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw s0, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s0, s6, s0
+; RV32I-NEXT: sw s0, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor t0, s4, t0
+; RV32I-NEXT: sw t0, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t1, t0
+; RV32I-NEXT: lw t0, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t2, t0
+; RV32I-NEXT: sw t0, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 560(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t3, t0
+; RV32I-NEXT: sw t0, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 480(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t4, t0
+; RV32I-NEXT: sw t0, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 392(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t5, t0
+; RV32I-NEXT: sw t0, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw t0, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t6, t0
+; RV32I-NEXT: sw t0, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor a0, a7, a0
+; RV32I-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 328(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a4, a0
+; RV32I-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a5, a0
+; RV32I-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a6, a0
+; RV32I-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a5, a0, 4
+; RV32I-NEXT: lw a1, 744(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a6, a0, a1
+; RV32I-NEXT: and a5, a5, a1
+; RV32I-NEXT: slli a6, a6, 4
+; RV32I-NEXT: or a0, a5, a6
+; RV32I-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, a0, 4
+; RV32I-NEXT: and a7, a0, a1
+; RV32I-NEXT: and a6, a6, a1
+; RV32I-NEXT: slli a7, a7, 4
+; RV32I-NEXT: or a0, a6, a7
+; RV32I-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a2, a0
+; RV32I-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 400(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 508(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 500(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, a2, a0
+; RV32I-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 488(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 484(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t2, a0, 4
+; RV32I-NEXT: and t3, a0, a1
+; RV32I-NEXT: and t2, t2, a1
+; RV32I-NEXT: slli t3, t3, 4
+; RV32I-NEXT: or a0, t2, t3
+; RV32I-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t3, a0, 4
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: and t3, t3, a1
; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or s7, a1, a0
-; RV32I-NEXT: andi a1, s7, 2
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s8, a0, s0
-; RV32I-NEXT: andi a1, s7, 4
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 8
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: andi a1, s7, 16
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 32
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 64
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: andi a1, s7, 128
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s7, 256
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 512
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s7, 1024
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: and a1, s7, s11
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: or a0, t3, a0
+; RV32I-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a0, 316(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, a1, a0
+; RV32I-NEXT: lw a0, 220(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s9, a0
+; RV32I-NEXT: lw a0, 136(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s10, a0
+; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 652(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, a1, a0
+; RV32I-NEXT: lw a0, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, s8, a0
+; RV32I-NEXT: lw a0, 324(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 640(sp) # 4-byte Folded Reload
; RV32I-NEXT: xor s0, s0, a0
+; RV32I-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, s1, a0
+; RV32I-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, a1, a0
+; RV32I-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a1, a0
+; RV32I-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, a0, t1
+; RV32I-NEXT: lw a0, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t2, a1, a0
+; RV32I-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, a1, a0
+; RV32I-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a1, a0
+; RV32I-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a1, a0
+; RV32I-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a0, a1
+; RV32I-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a4, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a0
+; RV32I-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a3, a0
+; RV32I-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a1, a0
+; RV32I-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor ra, a0, ra
+; RV32I-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: lw a2, 428(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, s8, a2
+; RV32I-NEXT: lw s8, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s11, s8, s11
+; RV32I-NEXT: lw s8, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s8, s9, s8
+; RV32I-NEXT: lw s9, 532(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, s10, s9
+; RV32I-NEXT: lw s10, 432(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s10, s7, s10
+; RV32I-NEXT: lw s7, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s5, s7, s5
+; RV32I-NEXT: lw s7, 256(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s6, s6, s7
+; RV32I-NEXT: lw s7, 160(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s2, s7
+; RV32I-NEXT: lw s7, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, s7
+; RV32I-NEXT: xor s0, s4, s0
+; RV32I-NEXT: lw s4, 264(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s1, s1, s4
+; RV32I-NEXT: lw s4, 168(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, s4
+; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, s4
+; RV32I-NEXT: xor t2, t6, t2
+; RV32I-NEXT: lw t6, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, t6
+; RV32I-NEXT: lw t6, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, t6
+; RV32I-NEXT: lw t6, 460(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t6
+; RV32I-NEXT: xor a4, t1, a4
+; RV32I-NEXT: lw t1, 272(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t1
+; RV32I-NEXT: lw t1, 176(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a6, t1
+; RV32I-NEXT: lw t1, 84(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, t1
+; RV32I-NEXT: xor a0, ra, a0
+; RV32I-NEXT: lw t1, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, t1
+; RV32I-NEXT: lw t1, 452(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, t1
+; RV32I-NEXT: xor t1, s11, s8
+; RV32I-NEXT: lw t6, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t6, s9, t6
+; RV32I-NEXT: lw s4, 456(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s4, s10, s4
+; RV32I-NEXT: xor s5, s5, s6
+; RV32I-NEXT: lw s6, 184(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s2, s2, s6
+; RV32I-NEXT: lw s6, 88(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, s6
+; RV32I-NEXT: xor s0, s0, s1
+; RV32I-NEXT: lw s1, 188(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, t4, s1
+; RV32I-NEXT: lw s1, 92(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, s1
+; RV32I-NEXT: xor t2, t2, t3
+; RV32I-NEXT: lw t3, 564(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, t3
+; RV32I-NEXT: lw t3, 464(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t3
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: lw a5, 196(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, a6, a5
+; RV32I-NEXT: lw a5, 96(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a5
+; RV32I-NEXT: lw a6, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a5, a6, 2
+; RV32I-NEXT: lw s6, 724(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a6, a6, s6
+; RV32I-NEXT: and a5, a5, s6
+; RV32I-NEXT: slli a6, a6, 2
+; RV32I-NEXT: or s1, a5, a6
+; RV32I-NEXT: lw a6, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a5, a6, 2
+; RV32I-NEXT: and a6, a6, s6
+; RV32I-NEXT: and a5, a5, s6
+; RV32I-NEXT: slli a6, a6, 2
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 472(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a2, a1
+; RV32I-NEXT: xor a2, t1, t6
+; RV32I-NEXT: lw a6, 468(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, s4, a6
+; RV32I-NEXT: lw t6, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, t6, 2
+; RV32I-NEXT: and t6, t6, s6
+; RV32I-NEXT: and a6, a6, s6
+; RV32I-NEXT: slli t6, t6, 2
+; RV32I-NEXT: or t6, a6, t6
+; RV32I-NEXT: lw s4, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli a6, s4, 2
+; RV32I-NEXT: and s4, s4, s6
+; RV32I-NEXT: and a6, a6, s6
+; RV32I-NEXT: slli s4, s4, 2
+; RV32I-NEXT: or a6, a6, s4
+; RV32I-NEXT: xor s2, s5, s2
+; RV32I-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s3, s3, s4
+; RV32I-NEXT: xor t4, s0, t4
+; RV32I-NEXT: lw s0, 116(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, t5, s0
+; RV32I-NEXT: xor a7, t2, a7
+; RV32I-NEXT: lw t2, 492(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t2
+; RV32I-NEXT: xor a4, a4, t3
+; RV32I-NEXT: lw t2, 124(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, t2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: xor a1, a2, t1
+; RV32I-NEXT: xor a2, s2, s3
+; RV32I-NEXT: xor t1, t4, t5
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: xor a3, a4, a3
+; RV32I-NEXT: lw a4, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a4
+; RV32I-NEXT: lw a4, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a1, a4
+; RV32I-NEXT: lw a1, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a1
+; RV32I-NEXT: lw a1, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, a1
+; RV32I-NEXT: lw a1, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a7, a1
+; RV32I-NEXT: lw a1, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a3, a1
+; RV32I-NEXT: srli a1, s1, 1
+; RV32I-NEXT: lw s2, 728(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a3, s1, s2
+; RV32I-NEXT: and a1, a1, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: xor a0, a4, a0
+; RV32I-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a0, t6, 1
+; RV32I-NEXT: and a3, t6, s2
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or t5, a0, a3
+; RV32I-NEXT: xor a0, t0, a2
+; RV32I-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a0, a7, 8
+; RV32I-NEXT: lw a3, 740(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: srli a2, a7, 24
+; RV32I-NEXT: or a2, a0, a2
+; RV32I-NEXT: slli a0, a7, 24
+; RV32I-NEXT: and a4, a7, a3
+; RV32I-NEXT: slli a4, a4, 8
+; RV32I-NEXT: or a4, a0, a4
+; RV32I-NEXT: srli a0, t1, 8
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: srli a7, t1, 24
+; RV32I-NEXT: or a7, a0, a7
+; RV32I-NEXT: slli a0, t1, 24
+; RV32I-NEXT: and t0, t1, a3
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or t0, a0, t0
+; RV32I-NEXT: srli a0, a5, 1
+; RV32I-NEXT: and a5, a5, s2
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: or a0, a0, a5
+; RV32I-NEXT: srli a5, a5, 31
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli t1, a1, 31
+; RV32I-NEXT: and a3, a5, t1
+; RV32I-NEXT: sw a3, 708(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a5, a6, 1
+; RV32I-NEXT: and a6, a6, s2
+; RV32I-NEXT: and a5, a5, s2
+; RV32I-NEXT: slli a6, a6, 1
+; RV32I-NEXT: or t3, a5, a6
+; RV32I-NEXT: srli a5, a6, 31
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, t5, 31
+; RV32I-NEXT: and a3, a5, a6
+; RV32I-NEXT: sw a3, 704(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a2, a4, a2
+; RV32I-NEXT: sw a2, 700(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a2, t0, a7
+; RV32I-NEXT: sw a2, 712(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 1
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 696(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 4
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 2
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 688(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 8
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 3
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 680(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 16
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 4
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 676(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 32
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 5
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 672(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 64
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 6
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 684(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 128
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 7
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 656(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 256
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 8
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 652(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 512
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 9
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 664(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 1024
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 10
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 692(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lw a3, 440(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a2, a0, a3
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 11
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 636(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a5, 1
+; RV32I-NEXT: and a2, a0, a5
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 12
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 628(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a6, 2
+; RV32I-NEXT: and a2, a0, a6
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 13
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 644(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a7, 4
+; RV32I-NEXT: and a2, a0, a7
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 14
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 660(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s3, 8
+; RV32I-NEXT: and a2, a0, s3
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 15
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 668(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t1, 16
+; RV32I-NEXT: and a2, a0, t1
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 16
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 612(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t2, 32
+; RV32I-NEXT: and a2, a0, t2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 17
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 604(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t4, 64
+; RV32I-NEXT: and a2, a0, t4
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 18
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui t6, 128
+; RV32I-NEXT: and a2, a0, t6
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 19
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 632(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s0, 256
+; RV32I-NEXT: and a2, a0, s0
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 20
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 640(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s4, 512
+; RV32I-NEXT: and a2, a0, s4
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 21
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 648(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s5, 1024
+; RV32I-NEXT: and a2, a0, s5
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 22
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 592(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s7, 2048
+; RV32I-NEXT: and a2, a0, s7
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 23
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 588(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui s11, 4096
+; RV32I-NEXT: and a2, a0, s11
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 24
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 8192
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 25
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 16384
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 26
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 32768
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 27
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 65536
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 28
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 624(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a2, 131072
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: slli a4, a1, 29
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a2, a0, 1
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a2, a2, a1
+; RV32I-NEXT: sw a2, 576(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a1, a1, 30
+; RV32I-NEXT: lui t0, 262144
+; RV32I-NEXT: and a0, a0, t0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 2
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 8
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 3
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 16
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 4
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 32
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 5
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 64
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 6
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 128
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 7
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 256
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 8
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 512
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 9
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, t3, 1024
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 10
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, a3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 11
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, a5
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 12
+; RV32I-NEXT: and s9, a0, a1
+; RV32I-NEXT: and a0, t3, a6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli s1, t5, 13
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, a7
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 14
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, s3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 15
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, t1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 16
+; RV32I-NEXT: and s8, a0, a1
+; RV32I-NEXT: and a0, t3, t2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a2, t5, 17
+; RV32I-NEXT: and s3, a0, a2
+; RV32I-NEXT: and a0, t3, t4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 18
+; RV32I-NEXT: and s10, a0, a1
+; RV32I-NEXT: and a0, t3, t6
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 19
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, s0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 20
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, s4
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli s0, t5, 21
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32I-NEXT: and a0, t3, s5
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: slli a1, t5, 22
+; RV32I-NEXT: and s0, a0, a1
+; RV32I-NEXT: and a1, t3, s7
+; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: slli a4, t5, 23
+; RV32I-NEXT: and t6, a1, a4
+; RV32I-NEXT: and a4, t3, s11
+; RV32I-NEXT: seqz a4, a4
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: slli a5, t5, 24
+; RV32I-NEXT: and s1, a4, a5
; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
+; RV32I-NEXT: and a5, t3, a0
+; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: slli a6, t5, 25
+; RV32I-NEXT: and s4, a5, a6
; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
+; RV32I-NEXT: and a6, t3, a0
+; RV32I-NEXT: seqz a6, a6
+; RV32I-NEXT: addi a6, a6, -1
+; RV32I-NEXT: slli a7, t5, 26
+; RV32I-NEXT: and s5, a6, a7
; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
+; RV32I-NEXT: and a7, t3, a0
+; RV32I-NEXT: seqz a7, a7
+; RV32I-NEXT: addi a7, a7, -1
+; RV32I-NEXT: slli t2, t5, 27
+; RV32I-NEXT: and s7, a7, t2
; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s8, s8, a0
+; RV32I-NEXT: and t2, t3, a0
+; RV32I-NEXT: seqz t2, t2
+; RV32I-NEXT: addi t2, t2, -1
+; RV32I-NEXT: slli a3, t5, 28
+; RV32I-NEXT: and s11, t2, a3
; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: and a1, s7, s9
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: lw a4, 128(sp) # 4-byte Folded Reload
-; RV32I-NEXT: srli a1, a4, 8
-; RV32I-NEXT: srli a2, a4, 24
-; RV32I-NEXT: xor a0, s8, a0
-; RV32I-NEXT: and a3, a4, s6
-; RV32I-NEXT: slli a4, a4, 24
-; RV32I-NEXT: xor a0, a0, s1
-; RV32I-NEXT: lw a6, 132(sp) # 4-byte Folded Reload
-; RV32I-NEXT: srli a5, a6, 8
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: srli a2, a6, 24
+; RV32I-NEXT: and a3, t3, a0
+; RV32I-NEXT: seqz a3, a3
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: slli t4, t5, 29
+; RV32I-NEXT: and t2, a3, t4
+; RV32I-NEXT: and t4, t3, t0
+; RV32I-NEXT: andi t3, t3, 1
+; RV32I-NEXT: seqz t3, t3
+; RV32I-NEXT: addi t3, t3, -1
+; RV32I-NEXT: and t3, t3, t5
+; RV32I-NEXT: slli t5, t5, 30
+; RV32I-NEXT: seqz t4, t4
+; RV32I-NEXT: addi t4, t4, -1
+; RV32I-NEXT: and t4, t4, t5
+; RV32I-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t5, a1, a0
+; RV32I-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 680(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a7, a0, a1
+; RV32I-NEXT: lw a0, 676(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, a0, a1
+; RV32I-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 652(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, a0, a1
+; RV32I-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a3, 628(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a0, a3
+; RV32I-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a4, 604(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a0, a4
+; RV32I-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a5, 588(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a0, a5
+; RV32I-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, a0, a1
+; RV32I-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t3, a0
+; RV32I-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw a2, 548(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 532(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw ra, 528(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, ra
+; RV32I-NEXT: lw ra, 516(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor s9, ra, s9
+; RV32I-NEXT: xor s3, s8, s3
+; RV32I-NEXT: xor t6, s0, t6
+; RV32I-NEXT: xor t2, t2, t4
+; RV32I-NEXT: xor a7, t5, a7
+; RV32I-NEXT: lw t4, 684(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t0, t4
+; RV32I-NEXT: lw t4, 664(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t1, t4
+; RV32I-NEXT: lw t4, 644(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t4, a3, t4
+; RV32I-NEXT: lw a3, 620(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, a3
+; RV32I-NEXT: lw a3, 596(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a3
+; RV32I-NEXT: lw a3, 708(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a6, a3
+; RV32I-NEXT: xor a6, t3, a0
+; RV32I-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a1, a1, a0
+; RV32I-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a0
+; RV32I-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, s9, a0
+; RV32I-NEXT: xor t5, s3, s10
+; RV32I-NEXT: xor t6, t6, s1
+; RV32I-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a0, t2, a0
+; RV32I-NEXT: lw s0, 700(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli t2, s0, 4
+; RV32I-NEXT: lw s3, 744(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and s0, s0, s3
+; RV32I-NEXT: and t2, t2, s3
+; RV32I-NEXT: slli s0, s0, 4
+; RV32I-NEXT: or t2, t2, s0
+; RV32I-NEXT: lw s1, 712(sp) # 4-byte Folded Reload
+; RV32I-NEXT: srli s0, s1, 4
+; RV32I-NEXT: and s1, s1, s3
+; RV32I-NEXT: and s0, s0, s3
+; RV32I-NEXT: slli s1, s1, 4
+; RV32I-NEXT: or s0, s0, s1
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 692(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, t0
+; RV32I-NEXT: lw t1, 660(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t1, t4, t1
+; RV32I-NEXT: lw t4, 632(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t4
+; RV32I-NEXT: lw t4, 600(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t4
+; RV32I-NEXT: xor a1, a6, a1
+; RV32I-NEXT: lw a6, 564(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a6
+; RV32I-NEXT: lw a6, 536(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, t3, a6
+; RV32I-NEXT: lw t3, 508(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t3, t5, t3
+; RV32I-NEXT: xor t4, t6, s4
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 668(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor t0, t1, t0
+; RV32I-NEXT: lw t1, 640(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t1
+; RV32I-NEXT: lw t1, 608(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t1
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 544(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a6, a2
+; RV32I-NEXT: lw a6, 512(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a6, t3, a6
+; RV32I-NEXT: xor t1, t4, s5
+; RV32I-NEXT: xor a7, a7, t0
+; RV32I-NEXT: lw t0, 648(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a4, a4, t0
+; RV32I-NEXT: lw t0, 616(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, t0
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: lw a2, 520(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a6, a2
+; RV32I-NEXT: xor a6, t1, s7
+; RV32I-NEXT: srli t0, t2, 2
+; RV32I-NEXT: and t1, t2, s6
+; RV32I-NEXT: and t0, t0, s6
+; RV32I-NEXT: slli t1, t1, 2
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: srli t1, s0, 2
+; RV32I-NEXT: and t2, s0, s6
+; RV32I-NEXT: and t1, t1, s6
+; RV32I-NEXT: slli t2, t2, 2
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: xor a4, a7, a4
+; RV32I-NEXT: lw a7, 624(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a5, a5, a7
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a2, a6, s11
+; RV32I-NEXT: xor a4, a4, a5
+; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: srli a2, t0, 1
+; RV32I-NEXT: and a5, t0, s2
+; RV32I-NEXT: xor a3, a4, a3
+; RV32I-NEXT: srli a4, t1, 1
+; RV32I-NEXT: and a6, t1, s2
+; RV32I-NEXT: and a2, a2, s2
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: and a4, a4, s2
+; RV32I-NEXT: slli a6, a6, 1
+; RV32I-NEXT: xor a7, a1, a0
+; RV32I-NEXT: or a1, a2, a5
+; RV32I-NEXT: or a0, a4, a6
+; RV32I-NEXT: srli a2, a3, 8
+; RV32I-NEXT: srli a4, a3, 24
+; RV32I-NEXT: slli a5, a3, 24
+; RV32I-NEXT: lw t0, 740(sp) # 4-byte Folded Reload
+; RV32I-NEXT: and a3, a3, t0
+; RV32I-NEXT: and a2, a2, t0
+; RV32I-NEXT: or a2, a2, a4
+; RV32I-NEXT: srli a4, a7, 8
; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: and a4, a6, s6
-; RV32I-NEXT: slli a6, a6, 24
-; RV32I-NEXT: and a5, a5, s6
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a2, a5, a2
-; RV32I-NEXT: or a4, a6, a4
-; RV32I-NEXT: lw a5, 120(sp) # 4-byte Folded Reload
-; RV32I-NEXT: xor a0, a5, a0
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: or a2, a4, a2
-; RV32I-NEXT: srli a3, a0, 8
-; RV32I-NEXT: srli a4, a0, 24
-; RV32I-NEXT: slli a5, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: srli a6, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: srli a7, a2, 4
-; RV32I-NEXT: and a2, a2, s5
-; RV32I-NEXT: and a3, a3, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: and a6, a6, s5
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: and a7, a7, s5
-; RV32I-NEXT: slli a2, a2, 4
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: or a0, a5, a0
-; RV32I-NEXT: or a1, a6, a1
-; RV32I-NEXT: or a2, a7, a2
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: srli a4, a2, 2
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: srli a5, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a3, a3, s4
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: and a4, a4, s4
-; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: and a5, a5, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: or a2, a4, a2
-; RV32I-NEXT: or a0, a5, a0
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: srli a4, a2, 1
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: srli a5, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a3, a3, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: and a4, a4, s3
-; RV32I-NEXT: slli a2, a2, 1
-; RV32I-NEXT: and a5, a5, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or s8, a3, a1
-; RV32I-NEXT: or s9, a4, a2
-; RV32I-NEXT: or a0, a5, a0
-; RV32I-NEXT: srli a1, s8, 8
-; RV32I-NEXT: srli a2, s8, 24
-; RV32I-NEXT: slli a3, s8, 24
-; RV32I-NEXT: and a4, s8, s6
-; RV32I-NEXT: srli a5, s9, 8
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: srli a2, s9, 24
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: slli a4, s9, 24
-; RV32I-NEXT: and a5, a5, s6
-; RV32I-NEXT: or a2, a5, a2
-; RV32I-NEXT: and a5, s9, s6
-; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: srli a5, a7, 24
+; RV32I-NEXT: and a4, a4, t0
; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: srli a5, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a5, a5, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a5, a0
-; RV32I-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: or a2, a4, a2
-; RV32I-NEXT: srli a0, a1, 4
-; RV32I-NEXT: and a1, a1, s5
+; RV32I-NEXT: slli a5, a7, 24
+; RV32I-NEXT: and a6, a7, t0
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lui a6, 349525
+; RV32I-NEXT: addi a6, a6, 1364
+; RV32I-NEXT: or a2, a3, a2
+; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: srli a3, a2, 4
-; RV32I-NEXT: and a2, a2, s5
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: and a3, a3, s5
+; RV32I-NEXT: and a2, a2, s3
+; RV32I-NEXT: srli a5, a4, 4
+; RV32I-NEXT: and a4, a4, s3
+; RV32I-NEXT: and a3, a3, s3
; RV32I-NEXT: slli a2, a2, 4
-; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: and a5, a5, s3
+; RV32I-NEXT: slli a4, a4, 4
; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
+; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: srli a3, a2, 2
-; RV32I-NEXT: and a2, a2, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: and a3, a3, s4
+; RV32I-NEXT: and a2, a2, s6
+; RV32I-NEXT: srli a5, a4, 2
+; RV32I-NEXT: and a4, a4, s6
+; RV32I-NEXT: and a3, a3, s6
; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: and a5, a5, s6
+; RV32I-NEXT: slli a4, a4, 2
; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
+; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: srli a3, a2, 1
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: and a3, a3, s3
+; RV32I-NEXT: srli a5, a4, 1
+; RV32I-NEXT: and a3, a3, a6
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: and a2, a2, s2
; RV32I-NEXT: slli a2, a2, 1
-; RV32I-NEXT: or s0, a1, a0
-; RV32I-NEXT: or s7, a3, a2
-; RV32I-NEXT: andi a1, s7, 2
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s10, a0, s1
-; RV32I-NEXT: andi a1, s7, 4
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 8
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: andi a1, s7, 16
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 32
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 64
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: andi a1, s7, 128
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a1, s7, 256
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 512
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s7, 1024
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: and a1, s7, s11
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: xor s10, s10, a0
-; RV32I-NEXT: lui a0, 131072
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 524288
-; RV32I-NEXT: and a1, s7, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s1, a0
-; RV32I-NEXT: lw a3, 136(sp) # 4-byte Folded Reload
-; RV32I-NEXT: srli a1, a3, 8
-; RV32I-NEXT: xor a0, s10, a0
-; RV32I-NEXT: srli a2, a3, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: and a2, a3, s6
-; RV32I-NEXT: slli a3, a3, 24
-; RV32I-NEXT: slli a2, a2, 8
; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: srli a3, a0, 8
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: srli a2, a0, 24
+; RV32I-NEXT: and a3, a4, s2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: srli a2, a2, 1
+; RV32I-NEXT: srli a3, a3, 1
+; RV32I-NEXT: lw a4, 720(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a2, a2, a4
+; RV32I-NEXT: lw a4, 716(sp) # 4-byte Folded Reload
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: srli a4, a2, 8
+; RV32I-NEXT: srli a5, a2, 24
+; RV32I-NEXT: slli a6, a2, 24
+; RV32I-NEXT: and a2, a2, t0
+; RV32I-NEXT: and a4, a4, t0
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: srli a5, a3, 8
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: or a2, a6, a2
+; RV32I-NEXT: srli a6, a3, 24
+; RV32I-NEXT: and a5, a5, t0
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: and a6, a3, t0
+; RV32I-NEXT: slli a3, a3, 24
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a3, a3, a6
+; RV32I-NEXT: or a2, a2, a4
+; RV32I-NEXT: or a3, a3, a5
+; RV32I-NEXT: srli a4, a2, 4
+; RV32I-NEXT: and a2, a2, s3
+; RV32I-NEXT: srli a5, a3, 4
+; RV32I-NEXT: and a3, a3, s3
+; RV32I-NEXT: and a4, a4, s3
+; RV32I-NEXT: and a5, a5, s3
+; RV32I-NEXT: slli a2, a2, 4
+; RV32I-NEXT: slli a3, a3, 4
+; RV32I-NEXT: or a2, a4, a2
+; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: srli a4, a2, 2
+; RV32I-NEXT: and a2, a2, s6
+; RV32I-NEXT: srli a5, a3, 2
; RV32I-NEXT: and a3, a3, s6
-; RV32I-NEXT: or a2, a3, a2
-; RV32I-NEXT: slli a3, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: srli a3, a1, 4
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: and a2, a3, s5
-; RV32I-NEXT: slli a1, a1, 4
-; RV32I-NEXT: srli a3, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: and a2, a3, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: srli a3, a1, 2
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: and a2, a3, s4
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: srli a3, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: and a2, a3, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: and a2, a3, s3
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: srli a3, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: or s11, a2, a1
-; RV32I-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, a3, a1
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: andi a1, s9, 2
-; RV32I-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s9, 1
-; RV32I-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s1, a0, s0
-; RV32I-NEXT: andi a1, s9, 4
-; RV32I-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s9, 8
-; RV32I-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s9, 16
-; RV32I-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s9, 32
-; RV32I-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s9, 64
-; RV32I-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: andi a1, s9, 128
-; RV32I-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: andi a1, s9, 256
-; RV32I-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s9, 512
-; RV32I-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s9, 1024
-; RV32I-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s1, s1, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor s7, s1, a0
-; RV32I-NEXT: lui s1, 131072
-; RV32I-NEXT: and a1, s9, s1
-; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s10, a0
-; RV32I-NEXT: lui s10, 524288
-; RV32I-NEXT: and a1, s9, s10
-; RV32I-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s11
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: lw a2, 268(sp) # 4-byte Folded Reload
-; RV32I-NEXT: srli a1, a2, 8
-; RV32I-NEXT: xor s7, s7, a0
-; RV32I-NEXT: srli a0, a2, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: and a1, a2, s6
-; RV32I-NEXT: slli a2, a2, 24
-; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or s9, a1, a0
-; RV32I-NEXT: andi a1, s9, 2
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: andi a1, s9, 1
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, a0, s11
-; RV32I-NEXT: andi a1, s9, 4
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: andi a1, s9, 8
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s9, 16
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: andi a1, s9, 32
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: andi a1, s9, 64
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: andi a1, s9, 128
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: andi a1, s9, 256
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: andi a1, s9, 512
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: andi a1, s9, 1024
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 4
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: lui a0, 32
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 64
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 128
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 256
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 512
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: lui a0, 1024
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: lui a0, 2048
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 4096
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 8192
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 16384
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 32768
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: lui a0, 65536
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: and a1, s9, s1
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: lui a0, 262144
-; RV32I-NEXT: and a1, s9, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s11, s11, a0
-; RV32I-NEXT: and a1, s9, s10
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor a0, a0, s7
-; RV32I-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: xor a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: slli a3, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or s7, a1, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 264(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, a0, s11
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s11, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s9, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s9, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s9, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 188(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 180(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s9, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 176(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 168(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 156(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s9, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 152(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s9, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 148(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s9, s11, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw a1, 140(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s9, a0
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: slli a2, a0, 24
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: slli a0, a0, 8
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or s9, a1, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, a0, s2
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 84(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s2, s2, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: xor s2, s0, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor s0, s0, a0
-; RV32I-NEXT: mv a0, s8
-; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: xor a0, s0, a0
-; RV32I-NEXT: xor a0, s2, a0
-; RV32I-NEXT: srli a1, a0, 8
-; RV32I-NEXT: srli a2, a0, 24
-; RV32I-NEXT: and a1, a1, s6
-; RV32I-NEXT: and a3, a0, s6
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: and a1, a1, s5
-; RV32I-NEXT: slli a0, a0, 4
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: and a1, a1, s4
-; RV32I-NEXT: slli a0, a0, 2
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: slli a0, a0, 1
-; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw a2, 132(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a2, 0(a1)
-; RV32I-NEXT: sw s9, 4(a1)
-; RV32I-NEXT: sw s7, 8(a1)
-; RV32I-NEXT: sw a0, 12(a1)
-; RV32I-NEXT: lw a1, 276(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a2, 0(a1)
-; RV32I-NEXT: sw s9, 4(a1)
-; RV32I-NEXT: sw s7, 8(a1)
-; RV32I-NEXT: sw a0, 12(a1)
-; RV32I-NEXT: lw ra, 332(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 328(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 324(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 320(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 316(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 312(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 308(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 304(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 300(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 296(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 292(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s10, 288(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s11, 284(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 336
+; RV32I-NEXT: and a4, a4, s6
+; RV32I-NEXT: and a5, a5, s6
+; RV32I-NEXT: slli a2, a2, 2
+; RV32I-NEXT: slli a3, a3, 2
+; RV32I-NEXT: or a2, a4, a2
+; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: srli a4, a2, 1
+; RV32I-NEXT: and a2, a2, s2
+; RV32I-NEXT: srli a5, a3, 1
+; RV32I-NEXT: and a3, a3, s2
+; RV32I-NEXT: and a4, a4, s2
+; RV32I-NEXT: and a5, a5, s2
+; RV32I-NEXT: slli a2, a2, 1
+; RV32I-NEXT: or a2, a4, a2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: lw a4, 732(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a2, 0(a4)
+; RV32I-NEXT: sw a1, 4(a4)
+; RV32I-NEXT: sw a3, 8(a4)
+; RV32I-NEXT: sw a0, 12(a4)
+; RV32I-NEXT: lw a4, 736(sp) # 4-byte Folded Reload
+; RV32I-NEXT: sw a2, 0(a4)
+; RV32I-NEXT: sw a1, 4(a4)
+; RV32I-NEXT: sw a3, 8(a4)
+; RV32I-NEXT: sw a0, 12(a4)
+; RV32I-NEXT: lw ra, 796(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 792(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 788(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 784(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 780(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 776(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 772(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 768(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 764(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s8, 760(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s9, 756(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s10, 752(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s11, 748(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 800
; RV32I-NEXT: ret
;
; RV64I-LABEL: commutative_clmulr_v2i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -400
-; RV64I-NEXT: sd ra, 392(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 384(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 376(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 368(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 360(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 352(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 344(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 336(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 328(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 320(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 312(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s10, 304(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s11, 296(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a5, 288(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a4, 280(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s3, a3
-; RV64I-NEXT: mv s2, a1
-; RV64I-NEXT: srli a1, a2, 24
-; RV64I-NEXT: lui t5, 4080
-; RV64I-NEXT: srli a3, a2, 8
-; RV64I-NEXT: li s11, 255
-; RV64I-NEXT: srli a4, a2, 40
-; RV64I-NEXT: lui t6, 16
-; RV64I-NEXT: srli a5, a2, 56
-; RV64I-NEXT: srliw a6, a2, 24
-; RV64I-NEXT: lui a7, 61681
-; RV64I-NEXT: lui t0, 209715
-; RV64I-NEXT: lui t1, 349525
-; RV64I-NEXT: srli t2, a0, 24
+; RV64I-NEXT: addi sp, sp, -1120
+; RV64I-NEXT: sd ra, 1112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 1104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 1096(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 1088(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 1080(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 1072(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 1064(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 1056(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 1048(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 1040(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 1032(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 1024(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 1016(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a5, 1008(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a4, 1000(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s5, a0
+; RV64I-NEXT: srli a5, a2, 24
+; RV64I-NEXT: lui s4, 4080
+; RV64I-NEXT: srli a6, a2, 8
+; RV64I-NEXT: li t4, 255
+; RV64I-NEXT: srli a7, a2, 40
+; RV64I-NEXT: lui s0, 16
+; RV64I-NEXT: srli t0, a2, 56
+; RV64I-NEXT: srliw t1, a2, 24
+; RV64I-NEXT: slli a4, a2, 56
+; RV64I-NEXT: lui t2, 61681
+; RV64I-NEXT: lui ra, 209715
+; RV64I-NEXT: lui s3, 349525
+; RV64I-NEXT: srli s6, a0, 24
; RV64I-NEXT: srli t3, a0, 8
-; RV64I-NEXT: srli t4, a0, 40
-; RV64I-NEXT: and a1, a1, t5
-; RV64I-NEXT: slli s11, s11, 24
-; RV64I-NEXT: and a3, a3, s11
-; RV64I-NEXT: or a1, a3, a1
-; RV64I-NEXT: addi s10, t6, -256
-; RV64I-NEXT: and a3, a4, s10
-; RV64I-NEXT: or a3, a3, a5
-; RV64I-NEXT: and a4, a2, t5
-; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: addi s9, a7, -241
-; RV64I-NEXT: addi s8, t0, 819
-; RV64I-NEXT: addi s7, t1, 1365
-; RV64I-NEXT: slli a4, a4, 24
-; RV64I-NEXT: or a4, a4, a6
-; RV64I-NEXT: slli a5, s9, 32
-; RV64I-NEXT: add s9, s9, a5
-; RV64I-NEXT: slli a5, s8, 32
-; RV64I-NEXT: add s8, s8, a5
-; RV64I-NEXT: slli a5, s7, 32
-; RV64I-NEXT: add s7, s7, a5
+; RV64I-NEXT: srli t5, a0, 40
+; RV64I-NEXT: srli t6, a0, 56
+; RV64I-NEXT: srliw s1, a0, 24
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: sd a0, 952(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli s8, a3, 24
+; RV64I-NEXT: srli s11, a3, 8
+; RV64I-NEXT: srli s7, a3, 40
+; RV64I-NEXT: srli s10, a3, 56
+; RV64I-NEXT: and a5, a5, s4
+; RV64I-NEXT: slli s2, t4, 24
+; RV64I-NEXT: and a6, a6, s2
+; RV64I-NEXT: or a6, a6, a5
+; RV64I-NEXT: addi a0, s0, -256
+; RV64I-NEXT: and a5, a7, a0
+; RV64I-NEXT: or a7, a5, t0
+; RV64I-NEXT: and a5, a2, s4
+; RV64I-NEXT: slli t1, t1, 32
+; RV64I-NEXT: addi s9, t2, -241
+; RV64I-NEXT: addi ra, ra, 819
+; RV64I-NEXT: addi t0, s3, 1365
+; RV64I-NEXT: slli a5, a5, 24
+; RV64I-NEXT: or a5, a5, t1
+; RV64I-NEXT: slli t1, s9, 32
+; RV64I-NEXT: add s9, s9, t1
+; RV64I-NEXT: slli t1, ra, 32
+; RV64I-NEXT: add ra, ra, t1
+; RV64I-NEXT: slli t1, t0, 32
+; RV64I-NEXT: add s0, t0, t1
+; RV64I-NEXT: srliw t1, a3, 24
+; RV64I-NEXT: and t0, s6, s4
+; RV64I-NEXT: and t2, t3, s2
+; RV64I-NEXT: or t0, t2, t0
+; RV64I-NEXT: srli t2, a1, 24
+; RV64I-NEXT: and t3, t5, a0
+; RV64I-NEXT: or t3, t3, t6
+; RV64I-NEXT: and t4, s5, s4
+; RV64I-NEXT: slli s1, s1, 32
+; RV64I-NEXT: slli t4, t4, 24
+; RV64I-NEXT: or t4, t4, s1
+; RV64I-NEXT: srli t5, a1, 8
+; RV64I-NEXT: and t6, s8, s4
+; RV64I-NEXT: mv s6, s2
+; RV64I-NEXT: sd s2, 992(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and s1, s11, s2
+; RV64I-NEXT: or t6, s1, t6
+; RV64I-NEXT: srli s1, a1, 40
+; RV64I-NEXT: and s2, s7, a0
+; RV64I-NEXT: or s2, s2, s10
+; RV64I-NEXT: and s3, a3, s4
+; RV64I-NEXT: slli t1, t1, 32
+; RV64I-NEXT: slli s3, s3, 24
+; RV64I-NEXT: or t1, s3, t1
+; RV64I-NEXT: srli s3, a1, 56
+; RV64I-NEXT: and t2, t2, s4
+; RV64I-NEXT: and t5, t5, s6
+; RV64I-NEXT: or t2, t5, t2
+; RV64I-NEXT: srliw t5, a1, 24
+; RV64I-NEXT: and s1, s1, a0
+; RV64I-NEXT: or s1, s1, s3
+; RV64I-NEXT: and s3, a1, s4
+; RV64I-NEXT: slli t5, t5, 32
+; RV64I-NEXT: slli s3, s3, 24
+; RV64I-NEXT: or s3, s3, t5
+; RV64I-NEXT: li t5, 1
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: slli a7, a3, 56
+; RV64I-NEXT: mv s4, a0
+; RV64I-NEXT: sd a0, 976(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a2, a0
+; RV64I-NEXT: slli a2, a2, 40
+; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: slli a4, a1, 56
+; RV64I-NEXT: or t0, t0, t3
+; RV64I-NEXT: slli s6, t5, 11
+; RV64I-NEXT: sd s6, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a0, s5, a0
+; RV64I-NEXT: and a3, a3, s4
+; RV64I-NEXT: and a1, a1, s4
+; RV64I-NEXT: slli a0, a0, 40
+; RV64I-NEXT: slli a3, a3, 40
+; RV64I-NEXT: slli a1, a1, 40
+; RV64I-NEXT: ld t3, 952(sp) # 8-byte Folded Reload
+; RV64I-NEXT: or a0, t3, a0
+; RV64I-NEXT: or t3, t6, s2
+; RV64I-NEXT: or a3, a7, a3
+; RV64I-NEXT: or a7, t2, s1
+; RV64I-NEXT: or a1, a4, a1
+; RV64I-NEXT: or a2, a2, a5
+; RV64I-NEXT: or a0, a0, t4
+; RV64I-NEXT: or a3, a3, t1
+; RV64I-NEXT: or a1, a1, s3
+; RV64I-NEXT: or a2, a2, a6
+; RV64I-NEXT: or a0, a0, t0
+; RV64I-NEXT: or a3, a3, t3
+; RV64I-NEXT: or a1, a1, a7
+; RV64I-NEXT: srli a4, a2, 4
+; RV64I-NEXT: sd s9, 984(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a2, s9
+; RV64I-NEXT: srli a5, a0, 4
+; RV64I-NEXT: and a0, a0, s9
+; RV64I-NEXT: srli a6, a3, 4
+; RV64I-NEXT: and a3, a3, s9
+; RV64I-NEXT: srli a7, a1, 4
+; RV64I-NEXT: and a1, a1, s9
+; RV64I-NEXT: and a4, a4, s9
+; RV64I-NEXT: slli a2, a2, 4
+; RV64I-NEXT: and a5, a5, s9
+; RV64I-NEXT: slli a0, a0, 4
+; RV64I-NEXT: and a6, a6, s9
+; RV64I-NEXT: slli a3, a3, 4
+; RV64I-NEXT: and a7, a7, s9
+; RV64I-NEXT: slli a1, a1, 4
+; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: or a0, a5, a0
+; RV64I-NEXT: or a3, a6, a3
+; RV64I-NEXT: or a1, a7, a1
+; RV64I-NEXT: srli a4, a2, 2
+; RV64I-NEXT: sd ra, 968(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a2, ra
+; RV64I-NEXT: srli a5, a0, 2
+; RV64I-NEXT: and a0, a0, ra
+; RV64I-NEXT: srli a6, a3, 2
+; RV64I-NEXT: and a3, a3, ra
+; RV64I-NEXT: srli a7, a1, 2
+; RV64I-NEXT: and a1, a1, ra
+; RV64I-NEXT: and a4, a4, ra
+; RV64I-NEXT: slli a2, a2, 2
+; RV64I-NEXT: and a5, a5, ra
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: and a6, a6, ra
+; RV64I-NEXT: slli a3, a3, 2
+; RV64I-NEXT: and a7, a7, ra
+; RV64I-NEXT: slli a1, a1, 2
+; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: or a0, a5, a0
+; RV64I-NEXT: or a3, a6, a3
+; RV64I-NEXT: or a1, a7, a1
+; RV64I-NEXT: srli a4, a2, 1
+; RV64I-NEXT: sd s0, 960(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, a2, s0
+; RV64I-NEXT: srli a5, a0, 1
+; RV64I-NEXT: and a0, a0, s0
+; RV64I-NEXT: srli a6, a3, 1
+; RV64I-NEXT: and a3, a3, s0
+; RV64I-NEXT: srli a7, a1, 1
+; RV64I-NEXT: and a1, a1, s0
+; RV64I-NEXT: and a4, a4, s0
+; RV64I-NEXT: slli a2, a2, 1
+; RV64I-NEXT: and a5, a5, s0
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: and a6, a6, s0
+; RV64I-NEXT: slli t0, a3, 1
+; RV64I-NEXT: and a7, a7, s0
+; RV64I-NEXT: slli a3, a1, 1
+; RV64I-NEXT: or s0, a4, a2
+; RV64I-NEXT: or s1, a5, a0
+; RV64I-NEXT: srli a2, a0, 63
+; RV64I-NEXT: or a1, a6, t0
+; RV64I-NEXT: or t0, a7, a3
+; RV64I-NEXT: srli a3, a3, 63
+; RV64I-NEXT: slli a4, s0, 1
+; RV64I-NEXT: andi a5, s1, 2
+; RV64I-NEXT: slli a6, s0, 2
+; RV64I-NEXT: andi a7, s1, 4
+; RV64I-NEXT: slli a0, s0, 3
+; RV64I-NEXT: andi t1, s1, 8
+; RV64I-NEXT: slli t2, s0, 4
+; RV64I-NEXT: andi t3, s1, 16
+; RV64I-NEXT: slli t4, s0, 5
+; RV64I-NEXT: andi t6, s1, 32
+; RV64I-NEXT: slli s2, s0, 63
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a2, a2, s2
+; RV64I-NEXT: sd a2, 944(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, a1, 63
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 952(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, s0, 6
+; RV64I-NEXT: seqz a3, a5
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: sd a3, 456(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a3, s1, 64
+; RV64I-NEXT: seqz a4, a7
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, a6
+; RV64I-NEXT: sd a4, 448(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a4, s0, 7
+; RV64I-NEXT: seqz a5, t1
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: and a0, a5, a0
+; RV64I-NEXT: sd a0, 912(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a5, s1, 128
+; RV64I-NEXT: seqz a6, t3
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: and a0, a6, t2
+; RV64I-NEXT: sd a0, 480(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a6, s0, 8
+; RV64I-NEXT: seqz a7, t6
+; RV64I-NEXT: addi a7, a7, -1
+; RV64I-NEXT: and a0, a7, t4
+; RV64I-NEXT: sd a0, 472(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a7, s1, 256
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 928(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, s0, 9
+; RV64I-NEXT: seqz a3, a5
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: sd a3, 464(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a3, s1, 512
+; RV64I-NEXT: seqz a4, a7
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a0, a4, a6
+; RV64I-NEXT: sd a0, 880(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a4, s0, 10
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: sd a2, 904(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a2, s1, 1024
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: sd a2, 936(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, s6
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 11
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 856(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 1
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 12
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 848(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 2
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 13
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 872(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 4
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 14
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 896(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 8
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 15
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 920(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 16
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 808(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 32
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 17
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 792(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 64
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 18
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 824(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 128
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 19
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 840(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 256
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 20
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 864(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 512
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 21
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 888(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 1024
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 22
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 744(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 2048
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 23
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 736(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 4096
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 24
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 768(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 8192
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 25
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 784(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 16384
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 26
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 800(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 32768
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 27
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 816(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 65536
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 28
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 832(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 131072
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 29
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 680(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a2, 262144
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 30
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 664(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sraiw a2, s1, 31
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 31
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 704(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 32
+; RV64I-NEXT: sd a2, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 32
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 720(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 33
+; RV64I-NEXT: sd a2, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 33
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 728(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 34
+; RV64I-NEXT: sd a2, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 34
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 752(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 35
+; RV64I-NEXT: sd a2, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 35
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 760(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 36
+; RV64I-NEXT: sd a2, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 36
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 776(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 37
+; RV64I-NEXT: sd a2, 384(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 37
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 608(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 38
+; RV64I-NEXT: sd a2, 376(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 38
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 592(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 39
+; RV64I-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 39
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 640(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 40
+; RV64I-NEXT: sd a2, 352(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 40
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 648(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 41
+; RV64I-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, s1, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 41
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 656(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli ra, t5, 42
+; RV64I-NEXT: and a2, s1, ra
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, s0, 42
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 672(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a3, t5, 43
+; RV64I-NEXT: and a2, s1, a3
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a4, s0, 43
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: sd a2, 688(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a2, t5, 44
+; RV64I-NEXT: and a4, s1, a2
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 44
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 696(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s10, t5, 45
+; RV64I-NEXT: and a4, s1, s10
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 45
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 712(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s9, t5, 46
+; RV64I-NEXT: and a4, s1, s9
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 46
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 552(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s8, t5, 47
+; RV64I-NEXT: and a4, s1, s8
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 47
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 544(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s7, t5, 48
+; RV64I-NEXT: and a4, s1, s7
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 48
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 560(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s6, t5, 49
+; RV64I-NEXT: and a4, s1, s6
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 49
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 568(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s5, t5, 50
+; RV64I-NEXT: and a4, s1, s5
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 50
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 576(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s4, t5, 51
+; RV64I-NEXT: and a4, s1, s4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 51
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 584(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s3, t5, 52
+; RV64I-NEXT: and a4, s1, s3
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 52
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 600(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a7, t5, 53
+; RV64I-NEXT: and a4, s1, a7
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 53
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 616(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a6, t5, 54
+; RV64I-NEXT: and a4, s1, a6
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 54
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 632(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, t5, 55
+; RV64I-NEXT: and a4, s1, a0
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 55
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 624(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s2, t5, 56
+; RV64I-NEXT: and a4, s1, s2
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 56
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 496(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t6, t5, 57
+; RV64I-NEXT: and a4, s1, t6
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 57
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 488(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t4, t5, 58
+; RV64I-NEXT: and a4, s1, t4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 58
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 504(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t3, t5, 59
+; RV64I-NEXT: and a4, s1, t3
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 59
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 512(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t2, t5, 60
+; RV64I-NEXT: and a4, s1, t2
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 60
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 520(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli s11, t5, 61
+; RV64I-NEXT: and a4, s1, s11
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, s0, 61
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 528(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli t1, t5, 62
+; RV64I-NEXT: andi a4, s1, 1
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and t5, a4, s0
+; RV64I-NEXT: slli s0, s0, 62
+; RV64I-NEXT: and a4, s1, t1
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a4, a4, s0
+; RV64I-NEXT: sd a4, 536(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 2
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 1
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 2
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 344(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 8
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 3
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 320(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 16
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 312(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 32
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 5
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 296(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 64
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 6
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 360(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 128
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 7
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 280(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 256
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 8
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 512
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 9
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 304(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a4, t0, 1024
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 10
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 336(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 11
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 216(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 13
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 288(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 8
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 15
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 440(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 16
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 17
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 176(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 64
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 200(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 128
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 19
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 232(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 256
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 20
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 512
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 21
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 272(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 1024
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 22
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 2048
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 4096
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 24
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 160(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 8192
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 25
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 168(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 16384
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 26
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 192(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 32768
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 27
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 208(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 65536
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 28
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 224(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 131072
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 29
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a4, 262144
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli s1, a1, 30
+; RV64I-NEXT: and a4, a4, s1
+; RV64I-NEXT: sd a4, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sraiw a4, t0, 31
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 31
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 32
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 33
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 34
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 416(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 35
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 424(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 36
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 432(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 384(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 37
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 376(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 38
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 39
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 352(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli s0, a1, 40
+; RV64I-NEXT: and a4, a4, s0
+; RV64I-NEXT: sd a4, 352(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a4, 328(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a4, t0, a4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 41
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 368(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a4, t0, ra
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 42
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: sd a4, 376(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a3, t0, a3
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: slli a4, a1, 43
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: sd a3, 384(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, a2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 44
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 400(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s10
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 45
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s9
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 46
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s8
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a5, a1, 47
+; RV64I-NEXT: and s10, a2, a5
+; RV64I-NEXT: and a2, t0, s7
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 48
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s6
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 49
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s5
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 50
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s4
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 51
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s3
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 52
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, a7
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 53
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, a6
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 54
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, a0
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 55
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sd a2, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: and a2, t0, s2
+; RV64I-NEXT: seqz a2, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: slli a3, a1, 56
+; RV64I-NEXT: and s6, a2, a3
+; RV64I-NEXT: and a3, t0, t6
+; RV64I-NEXT: seqz a3, a3
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: slli a4, a1, 57
+; RV64I-NEXT: and s5, a3, a4
+; RV64I-NEXT: and a4, t0, t4
+; RV64I-NEXT: seqz a4, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: slli a5, a1, 58
+; RV64I-NEXT: and s7, a4, a5
+; RV64I-NEXT: and a5, t0, t3
+; RV64I-NEXT: seqz a5, a5
+; RV64I-NEXT: addi a5, a5, -1
+; RV64I-NEXT: slli a6, a1, 59
+; RV64I-NEXT: and s8, a5, a6
+; RV64I-NEXT: and a6, t0, t2
+; RV64I-NEXT: seqz a6, a6
+; RV64I-NEXT: addi a6, a6, -1
+; RV64I-NEXT: slli a0, a1, 60
+; RV64I-NEXT: and s9, a6, a0
+; RV64I-NEXT: and a0, t0, s11
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: slli s11, a1, 61
+; RV64I-NEXT: and s11, a0, s11
+; RV64I-NEXT: and a0, t0, t1
+; RV64I-NEXT: andi t0, t0, 1
+; RV64I-NEXT: seqz t0, t0
+; RV64I-NEXT: addi t0, t0, -1
+; RV64I-NEXT: and t0, t0, a1
+; RV64I-NEXT: slli a1, a1, 62
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: ld a0, 456(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s4, t5, a0
+; RV64I-NEXT: ld a0, 912(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, a1, a0
+; RV64I-NEXT: ld a0, 480(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, a0, a1
+; RV64I-NEXT: ld a0, 880(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 464(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, a1, a0
+; RV64I-NEXT: ld a0, 856(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 848(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, a0, a1
+; RV64I-NEXT: ld a0, 808(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 792(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, a0, s0
+; RV64I-NEXT: ld a0, 744(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 736(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, a0, s1
+; RV64I-NEXT: ld a0, 680(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 664(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, a0, a1
+; RV64I-NEXT: ld a0, 608(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 592(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s3, a0, a1
+; RV64I-NEXT: ld a0, 552(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 544(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, a0, a1
+; RV64I-NEXT: ld a0, 496(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 488(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, a0, a1
+; RV64I-NEXT: ld a0, 392(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t0, a0
+; RV64I-NEXT: ld a0, 344(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a2, 296(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: ld a2, 280(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a3, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, a3
+; RV64I-NEXT: ld a3, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a4, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a3, a4
+; RV64I-NEXT: ld a4, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a5, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a4, a5
+; RV64I-NEXT: ld a5, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a6, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, a6
+; RV64I-NEXT: ld a6, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld a7, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a6, a7
+; RV64I-NEXT: ld a7, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld ra, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, ra
+; RV64I-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s10, ra, s10
+; RV64I-NEXT: xor s5, s6, s5
+; RV64I-NEXT: xor t3, s4, t3
+; RV64I-NEXT: ld s4, 928(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t4, s4
+; RV64I-NEXT: ld s4, 904(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t5, s4
+; RV64I-NEXT: ld s4, 872(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, t6, s4
+; RV64I-NEXT: ld s4, 824(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s0, s4
+; RV64I-NEXT: ld s4, 768(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, s1, s4
+; RV64I-NEXT: ld s4, 704(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, s2, s4
+; RV64I-NEXT: ld s4, 640(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s3, s3, s4
+; RV64I-NEXT: ld s4, 560(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, s4
+; RV64I-NEXT: ld s4, 504(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, s4
+; RV64I-NEXT: xor a0, t0, a0
+; RV64I-NEXT: ld t0, 360(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a1, t0
+; RV64I-NEXT: ld t0, 304(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a2, t0
+; RV64I-NEXT: ld t0, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a3, t0
+; RV64I-NEXT: ld t0, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a4, t0
+; RV64I-NEXT: ld t0, 160(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a5, t0
+; RV64I-NEXT: ld t0, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a6, t0
+; RV64I-NEXT: ld t0, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, a7, t0
+; RV64I-NEXT: ld t0, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, s10, t0
+; RV64I-NEXT: xor s4, s5, s7
+; RV64I-NEXT: xor t3, t3, t4
+; RV64I-NEXT: ld t4, 936(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t5, t4
+; RV64I-NEXT: ld t5, 896(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t6, t5
+; RV64I-NEXT: ld t6, 840(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s0, t6
+; RV64I-NEXT: ld s0, 784(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s1, s0
+; RV64I-NEXT: ld s1, 720(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, s2, s1
+; RV64I-NEXT: ld s2, 648(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s2, s3, s2
+; RV64I-NEXT: ld s3, 568(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, s3
+; RV64I-NEXT: ld s3, 512(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, s3
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 288(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 232(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: ld a6, 352(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a7, a6
+; RV64I-NEXT: ld a7, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, t0, a7
+; RV64I-NEXT: xor t0, s4, s8
+; RV64I-NEXT: xor t3, t3, t4
+; RV64I-NEXT: ld t4, 920(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t5, t4
+; RV64I-NEXT: ld t5, 864(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, t6, t5
+; RV64I-NEXT: ld t6, 800(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s0, t6
+; RV64I-NEXT: ld s0, 728(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s0, s1, s0
+; RV64I-NEXT: ld s1, 656(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor s1, s2, s1
+; RV64I-NEXT: ld s2, 576(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, s2
+; RV64I-NEXT: ld s2, 520(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, s2
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 368(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: ld a6, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, a7, a6
+; RV64I-NEXT: xor a7, t0, s9
+; RV64I-NEXT: xor t0, t3, t4
+; RV64I-NEXT: ld t3, 888(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t5, t3
+; RV64I-NEXT: ld t4, 816(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t6, t4
+; RV64I-NEXT: ld t5, 752(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t5, s0, t5
+; RV64I-NEXT: ld t6, 672(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t6, s1, t6
+; RV64I-NEXT: ld s0, 584(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, s0
+; RV64I-NEXT: ld s0, 528(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, s0
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 416(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 376(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: xor a6, a7, s11
+; RV64I-NEXT: xor a7, t0, t3
+; RV64I-NEXT: ld t0, 832(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t4, t0
+; RV64I-NEXT: ld t3, 760(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t3, t5, t3
+; RV64I-NEXT: ld t4, 688(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t4, t6, t4
+; RV64I-NEXT: ld t5, 600(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, t5
+; RV64I-NEXT: ld t5, 536(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, t5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 424(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 384(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: ld a5, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a6, a5
+; RV64I-NEXT: xor a6, a7, t0
+; RV64I-NEXT: ld a7, 776(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, t3, a7
+; RV64I-NEXT: ld t0, 696(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t0, t4, t0
+; RV64I-NEXT: ld t3, 616(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t1, t1, t3
+; RV64I-NEXT: ld t3, 944(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor t2, t2, t3
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 400(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: ld a3, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: xor a5, a6, a7
+; RV64I-NEXT: ld a6, 712(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a6, t0, a6
+; RV64I-NEXT: ld a7, 632(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a7, t1, a7
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: ld a2, 328(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a2, a3, a2
+; RV64I-NEXT: xor a3, a5, a6
+; RV64I-NEXT: ld a5, 624(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a5, a7, a5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: xor a3, a3, a5
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: xor a1, a3, t2
+; RV64I-NEXT: xor a0, a0, a4
+; RV64I-NEXT: srli a2, a1, 40
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 24
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: srli a6, a0, 8
+; RV64I-NEXT: ld a7, 992(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a5, a5, a7
+; RV64I-NEXT: and a6, a6, a7
+; RV64I-NEXT: srliw a7, a1, 24
+; RV64I-NEXT: ld t1, 976(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a2, a2, t1
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: lui t0, 4080
+; RV64I-NEXT: and a3, a1, t0
+; RV64I-NEXT: and a4, a4, t0
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: slli a5, a1, 56
+; RV64I-NEXT: and a1, a1, t1
+; RV64I-NEXT: slli a7, a7, 32
+; RV64I-NEXT: slli a3, a3, 24
+; RV64I-NEXT: or a3, a3, a7
+; RV64I-NEXT: srli a7, a0, 40
+; RV64I-NEXT: slli a1, a1, 40
+; RV64I-NEXT: or a1, a5, a1
; RV64I-NEXT: srli a5, a0, 56
-; RV64I-NEXT: and a6, t2, t5
-; RV64I-NEXT: and a7, t3, s11
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: and a7, a7, t1
+; RV64I-NEXT: or a5, a7, a5
+; RV64I-NEXT: srli a7, a0, 24
+; RV64I-NEXT: and a7, a7, t0
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: srliw a7, a0, 24
-; RV64I-NEXT: and t0, t4, s10
-; RV64I-NEXT: or a5, t0, a5
-; RV64I-NEXT: and t0, a0, t5
+; RV64I-NEXT: and t0, a0, t0
; RV64I-NEXT: slli a7, a7, 32
; RV64I-NEXT: slli t0, t0, 24
; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: and t0, a0, t1
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: slli t0, t0, 40
+; RV64I-NEXT: or a0, a0, t0
+; RV64I-NEXT: or a2, a4, a2
; RV64I-NEXT: or a1, a1, a3
-; RV64I-NEXT: slli a3, a2, 56
-; RV64I-NEXT: and a2, a2, s10
-; RV64I-NEXT: slli a2, a2, 40
-; RV64I-NEXT: or a2, a3, a2
; RV64I-NEXT: or a3, a6, a5
-; RV64I-NEXT: slli a5, a0, 56
-; RV64I-NEXT: and a0, a0, s10
-; RV64I-NEXT: slli a0, a0, 40
-; RV64I-NEXT: or a0, a5, a0
-; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: or a0, a0, a7
-; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: or a1, a1, a2
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: srli a2, a1, 4
-; RV64I-NEXT: and a1, a1, s9
+; RV64I-NEXT: ld a4, 984(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a1, a4
; RV64I-NEXT: srli a3, a0, 4
-; RV64I-NEXT: and a0, a0, s9
-; RV64I-NEXT: and a2, a2, s9
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: and a3, a3, a4
; RV64I-NEXT: slli a1, a1, 4
-; RV64I-NEXT: and a3, a3, s9
; RV64I-NEXT: slli a0, a0, 4
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: srli a2, a1, 2
-; RV64I-NEXT: and a1, a1, s8
+; RV64I-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a1, a4
; RV64I-NEXT: srli a3, a0, 2
-; RV64I-NEXT: and a0, a0, s8
-; RV64I-NEXT: and a2, a2, s8
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: and a3, a3, a4
; RV64I-NEXT: slli a1, a1, 2
-; RV64I-NEXT: and a3, a3, s8
; RV64I-NEXT: slli a0, a0, 2
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: srli a2, a1, 1
-; RV64I-NEXT: and a1, a1, s7
+; RV64I-NEXT: ld a4, 960(sp) # 8-byte Folded Reload
+; RV64I-NEXT: and a1, a1, a4
; RV64I-NEXT: srli a3, a0, 1
-; RV64I-NEXT: and a0, a0, s7
-; RV64I-NEXT: and a2, a2, s7
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: and a3, a3, a4
; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: and a3, a3, s7
-; RV64I-NEXT: slli a0, a0, 1
-; RV64I-NEXT: or s4, a2, a1
-; RV64I-NEXT: or s0, a3, a0
-; RV64I-NEXT: andi a1, s0, 2
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s0, 1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, a0, s5
-; RV64I-NEXT: andi a1, s0, 4
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s0, 8
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: andi a1, s0, 16
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s0, 32
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s0, 64
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: xor s6, s1, a0
-; RV64I-NEXT: andi a1, s0, 128
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: andi a1, s0, 256
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s5, a0
-; RV64I-NEXT: andi a1, s0, 512
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s1, s1, a0
-; RV64I-NEXT: andi a1, s0, 1024
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s1, a0
-; RV64I-NEXT: li s1, 1
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: slli a1, s1, 11
-; RV64I-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a1, 32
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 64
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 256
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 512
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: lui a1, 1024
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a1, 2048
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 4096
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 16384
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 32768
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 65536
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: lui a1, 131072
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: lui a1, 262144
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 31
-; RV64I-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 32
-; RV64I-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 33
-; RV64I-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 34
-; RV64I-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 35
-; RV64I-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 36
-; RV64I-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: slli a1, s1, 37
-; RV64I-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: slli a1, s1, 38
-; RV64I-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 39
-; RV64I-NEXT: sd a1, 200(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 40
-; RV64I-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 41
-; RV64I-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 42
-; RV64I-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 43
-; RV64I-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 44
-; RV64I-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 45
-; RV64I-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: slli a1, s1, 46
-; RV64I-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: slli a1, s1, 47
-; RV64I-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 48
-; RV64I-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 49
-; RV64I-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 50
-; RV64I-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 51
-; RV64I-NEXT: sd a1, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 52
-; RV64I-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 53
-; RV64I-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 54
-; RV64I-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 55
-; RV64I-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: slli a1, s1, 56
-; RV64I-NEXT: sd a1, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: slli a1, s1, 57
-; RV64I-NEXT: sd a1, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 58
-; RV64I-NEXT: sd a1, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 59
-; RV64I-NEXT: sd a1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 60
-; RV64I-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 61
-; RV64I-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: slli a1, s1, 62
-; RV64I-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: li s1, -1
-; RV64I-NEXT: slli a1, s1, 63
-; RV64I-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s5, a0
-; RV64I-NEXT: srli a1, s3, 24
-; RV64I-NEXT: srli a2, s3, 8
-; RV64I-NEXT: srli a3, s3, 40
-; RV64I-NEXT: srli a4, s3, 56
-; RV64I-NEXT: lui t0, 4080
-; RV64I-NEXT: and a5, s3, t0
-; RV64I-NEXT: srliw a6, s3, 24
-; RV64I-NEXT: xor a0, s6, a0
-; RV64I-NEXT: and a7, s3, s10
-; RV64I-NEXT: slli s3, s3, 56
-; RV64I-NEXT: and a1, a1, t0
-; RV64I-NEXT: lui s1, 4080
-; RV64I-NEXT: and a2, a2, s11
; RV64I-NEXT: or a1, a2, a1
-; RV64I-NEXT: srli a2, s2, 24
-; RV64I-NEXT: and a3, a3, s10
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: srli a4, s2, 8
-; RV64I-NEXT: slli a5, a5, 24
-; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: srli a6, s2, 40
-; RV64I-NEXT: slli a7, a7, 40
-; RV64I-NEXT: or a7, s3, a7
-; RV64I-NEXT: srli t0, s2, 56
-; RV64I-NEXT: and a2, a2, s1
-; RV64I-NEXT: and a4, a4, s11
-; RV64I-NEXT: or a2, a4, a2
-; RV64I-NEXT: and a4, s2, s1
-; RV64I-NEXT: and a6, a6, s10
-; RV64I-NEXT: or a6, a6, t0
-; RV64I-NEXT: srliw t0, s2, 24
-; RV64I-NEXT: slli a4, a4, 24
-; RV64I-NEXT: slli t0, t0, 32
-; RV64I-NEXT: or a4, a4, t0
-; RV64I-NEXT: and t0, s2, s10
-; RV64I-NEXT: slli s2, s2, 56
-; RV64I-NEXT: slli t0, t0, 40
-; RV64I-NEXT: or t0, s2, t0
-; RV64I-NEXT: or a1, a1, a3
-; RV64I-NEXT: srli a3, a0, 40
-; RV64I-NEXT: or a5, a7, a5
-; RV64I-NEXT: srli a7, a0, 56
-; RV64I-NEXT: or a2, a2, a6
-; RV64I-NEXT: srli a6, a0, 24
-; RV64I-NEXT: or a4, t0, a4
-; RV64I-NEXT: srli t0, a0, 8
-; RV64I-NEXT: and a3, a3, s10
-; RV64I-NEXT: or a3, a3, a7
-; RV64I-NEXT: srliw a7, a0, 24
-; RV64I-NEXT: and a6, a6, s1
-; RV64I-NEXT: and t0, t0, s11
-; RV64I-NEXT: or a6, t0, a6
-; RV64I-NEXT: and t0, a0, s1
-; RV64I-NEXT: slli a7, a7, 32
-; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a7, t0, a7
-; RV64I-NEXT: slli t0, a0, 56
-; RV64I-NEXT: and a0, a0, s10
-; RV64I-NEXT: slli a0, a0, 40
-; RV64I-NEXT: or a0, t0, a0
-; RV64I-NEXT: or a1, a5, a1
-; RV64I-NEXT: or a2, a4, a2
-; RV64I-NEXT: or a3, a6, a3
-; RV64I-NEXT: or a0, a0, a7
-; RV64I-NEXT: srli a4, a1, 4
-; RV64I-NEXT: and a1, a1, s9
-; RV64I-NEXT: srli a5, a2, 4
-; RV64I-NEXT: and a2, a2, s9
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: and a3, a4, s9
-; RV64I-NEXT: slli a1, a1, 4
-; RV64I-NEXT: and a4, a5, s9
-; RV64I-NEXT: slli a2, a2, 4
-; RV64I-NEXT: srli a5, a0, 4
-; RV64I-NEXT: and a0, a0, s9
-; RV64I-NEXT: or a1, a3, a1
-; RV64I-NEXT: or a2, a4, a2
-; RV64I-NEXT: and a3, a5, s9
-; RV64I-NEXT: slli a0, a0, 4
-; RV64I-NEXT: srli a4, a1, 2
-; RV64I-NEXT: and a1, a1, s8
-; RV64I-NEXT: srli a5, a2, 2
-; RV64I-NEXT: and a2, a2, s8
-; RV64I-NEXT: or a0, a3, a0
-; RV64I-NEXT: and a3, a4, s8
-; RV64I-NEXT: slli a1, a1, 2
-; RV64I-NEXT: and a4, a5, s8
-; RV64I-NEXT: slli a2, a2, 2
-; RV64I-NEXT: srli a5, a0, 2
-; RV64I-NEXT: and a0, a0, s8
-; RV64I-NEXT: or a1, a3, a1
-; RV64I-NEXT: or a2, a4, a2
-; RV64I-NEXT: and a3, a5, s8
-; RV64I-NEXT: slli a0, a0, 2
-; RV64I-NEXT: srli a4, a1, 1
-; RV64I-NEXT: and a1, a1, s7
-; RV64I-NEXT: srli a5, a2, 1
-; RV64I-NEXT: and a2, a2, s7
-; RV64I-NEXT: or a0, a3, a0
-; RV64I-NEXT: and a3, a4, s7
-; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: and a4, a5, s7
-; RV64I-NEXT: slli a2, a2, 1
-; RV64I-NEXT: srli a5, a0, 1
-; RV64I-NEXT: and a0, a0, s7
-; RV64I-NEXT: or s2, a3, a1
-; RV64I-NEXT: or s0, a4, a2
-; RV64I-NEXT: and a1, a5, s7
-; RV64I-NEXT: slli a0, a0, 1
-; RV64I-NEXT: or s4, a1, a0
-; RV64I-NEXT: andi a1, s0, 2
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: andi a1, s0, 1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s5, a0, s3
-; RV64I-NEXT: andi a1, s0, 4
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: andi a1, s0, 8
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s0, 16
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: andi a1, s0, 32
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s0, 64
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: andi a1, s0, 128
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: andi a1, s0, 256
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s0, 512
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: andi a1, s0, 1024
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 4
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: lui a1, 32
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 64
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 128
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 256
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 512
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s5, s5, a0
-; RV64I-NEXT: lui a1, 1024
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: lui a1, 2048
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 4096
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 8192
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 16384
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 32768
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: lui a1, 65536
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s5, a0
-; RV64I-NEXT: lui a1, 131072
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: lui a1, 262144
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor s6, s6, a0
-; RV64I-NEXT: ld a1, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor s3, s3, a0
-; RV64I-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: and a1, s0, a1
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: xor a0, s3, a0
-; RV64I-NEXT: xor a0, s6, a0
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: srli a2, a0, 56
-; RV64I-NEXT: srli a3, a0, 8
-; RV64I-NEXT: and a3, a3, s11
-; RV64I-NEXT: srli a4, a0, 24
-; RV64I-NEXT: and a1, a1, s10
-; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: srliw a2, a0, 24
-; RV64I-NEXT: and a4, a4, s1
-; RV64I-NEXT: and a5, a0, s1
-; RV64I-NEXT: and a6, a0, s10
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: slli a5, a5, 24
-; RV64I-NEXT: slli a6, a6, 40
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: or a2, a5, a2
-; RV64I-NEXT: or a0, a0, a6
-; RV64I-NEXT: or a1, a3, a1
-; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: and a0, a0, s9
-; RV64I-NEXT: and a1, a1, s9
-; RV64I-NEXT: slli a0, a0, 4
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 2
-; RV64I-NEXT: and a0, a0, s8
-; RV64I-NEXT: and a1, a1, s8
-; RV64I-NEXT: slli a0, a0, 2
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: and a0, a0, s7
-; RV64I-NEXT: and a1, a1, s7
; RV64I-NEXT: slli a0, a0, 1
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sd s4, 0(a1)
-; RV64I-NEXT: sd a0, 8(a1)
-; RV64I-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sd s4, 0(a1)
-; RV64I-NEXT: sd a0, 8(a1)
-; RV64I-NEXT: ld ra, 392(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 384(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 376(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 368(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 360(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 352(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 344(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 336(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 328(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 320(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 312(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s10, 304(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s11, 296(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 400
+; RV64I-NEXT: or a0, a3, a0
+; RV64I-NEXT: ld a2, 1000(sp) # 8-byte Folded Reload
+; RV64I-NEXT: sd a1, 0(a2)
+; RV64I-NEXT: sd a0, 8(a2)
+; RV64I-NEXT: ld a2, 1008(sp) # 8-byte Folded Reload
+; RV64I-NEXT: sd a1, 0(a2)
+; RV64I-NEXT: sd a0, 8(a2)
+; RV64I-NEXT: ld ra, 1112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 1104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 1096(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 1088(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 1080(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 1072(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 1064(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 1056(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 1048(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 1040(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 1032(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 1024(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 1016(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 1120
; RV64I-NEXT: ret
;
; RV32IM-LABEL: commutative_clmulr_v2i64:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -800
-; RV32IM-NEXT: sw ra, 796(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s0, 792(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s1, 788(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s2, 784(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s3, 780(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s4, 776(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s5, 772(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s6, 768(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s7, 764(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s8, 760(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s9, 756(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s10, 752(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw s11, 748(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a3, 680(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw a2, 676(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a5, 0(a0)
-; RV32IM-NEXT: lw s0, 4(a0)
-; RV32IM-NEXT: lw t0, 8(a0)
-; RV32IM-NEXT: lw a2, 12(a0)
-; RV32IM-NEXT: lw t5, 0(a1)
-; RV32IM-NEXT: lw s3, 4(a1)
-; RV32IM-NEXT: lw t2, 8(a1)
-; RV32IM-NEXT: lw a1, 12(a1)
-; RV32IM-NEXT: lui a0, 16
-; RV32IM-NEXT: lui t1, 61681
-; RV32IM-NEXT: lui s1, 209715
-; RV32IM-NEXT: lui s11, 349525
-; RV32IM-NEXT: li ra, 1
-; RV32IM-NEXT: addi s10, a0, -256
-; RV32IM-NEXT: srli a0, s3, 8
-; RV32IM-NEXT: srli s2, s3, 24
-; RV32IM-NEXT: and a3, s3, s10
-; RV32IM-NEXT: slli s3, s3, 24
-; RV32IM-NEXT: srli a4, s0, 8
-; RV32IM-NEXT: srli a6, s0, 24
-; RV32IM-NEXT: and t6, s0, s10
-; RV32IM-NEXT: slli s0, s0, 24
-; RV32IM-NEXT: srli a7, t5, 8
-; RV32IM-NEXT: srli t4, t5, 24
-; RV32IM-NEXT: and t3, t5, s10
-; RV32IM-NEXT: slli t5, t5, 24
-; RV32IM-NEXT: and a0, a0, s10
-; RV32IM-NEXT: or a0, a0, s2
-; RV32IM-NEXT: srli s2, a5, 8
-; RV32IM-NEXT: slli a3, a3, 8
-; RV32IM-NEXT: or a3, s3, a3
-; RV32IM-NEXT: srli s4, a5, 24
-; RV32IM-NEXT: and a4, a4, s10
-; RV32IM-NEXT: or a4, a4, a6
-; RV32IM-NEXT: and a6, a5, s10
-; RV32IM-NEXT: slli s3, a5, 24
-; RV32IM-NEXT: slli a5, t6, 8
-; RV32IM-NEXT: or a5, s0, a5
-; RV32IM-NEXT: srli s6, a1, 8
-; RV32IM-NEXT: and a7, a7, s10
-; RV32IM-NEXT: or a7, a7, t4
-; RV32IM-NEXT: srli s8, a1, 24
+; RV32IM-NEXT: addi sp, sp, -704
+; RV32IM-NEXT: sw ra, 700(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s0, 696(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s1, 692(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s2, 688(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s3, 684(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s4, 680(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s5, 676(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s6, 672(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s7, 668(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s8, 664(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s9, 660(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s10, 656(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw s11, 652(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a2, 636(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a3, 0(a0)
+; RV32IM-NEXT: lw s2, 4(a0)
+; RV32IM-NEXT: lw t2, 8(a0)
+; RV32IM-NEXT: lw t1, 12(a0)
+; RV32IM-NEXT: lw t4, 0(a1)
+; RV32IM-NEXT: lw s4, 4(a1)
+; RV32IM-NEXT: lw a5, 8(a1)
+; RV32IM-NEXT: lw a0, 12(a1)
+; RV32IM-NEXT: lui a4, 16
+; RV32IM-NEXT: lui a1, 61681
+; RV32IM-NEXT: lui a2, 209715
+; RV32IM-NEXT: lui s6, 349525
+; RV32IM-NEXT: li a7, 1
+; RV32IM-NEXT: addi s11, a4, -256
+; RV32IM-NEXT: addi s10, a1, -241
+; RV32IM-NEXT: addi s8, a2, 819
+; RV32IM-NEXT: srli s0, s4, 8
+; RV32IM-NEXT: srli s5, s4, 24
+; RV32IM-NEXT: and a1, s4, s11
+; RV32IM-NEXT: slli s4, s4, 24
+; RV32IM-NEXT: srli a2, s2, 8
+; RV32IM-NEXT: srli s3, s2, 24
+; RV32IM-NEXT: and t3, s2, s11
+; RV32IM-NEXT: slli s2, s2, 24
+; RV32IM-NEXT: srli a6, t4, 8
+; RV32IM-NEXT: srli t0, t4, 24
+; RV32IM-NEXT: and a4, t4, s11
+; RV32IM-NEXT: slli t4, t4, 24
+; RV32IM-NEXT: and t5, s0, s11
+; RV32IM-NEXT: or s0, t5, s5
+; RV32IM-NEXT: srli s5, a3, 8
+; RV32IM-NEXT: slli a1, a1, 8
+; RV32IM-NEXT: or a1, s4, a1
+; RV32IM-NEXT: srli t5, a3, 24
+; RV32IM-NEXT: and a2, a2, s11
+; RV32IM-NEXT: or a2, a2, s3
+; RV32IM-NEXT: and s3, a3, s11
+; RV32IM-NEXT: slli s4, a3, 24
; RV32IM-NEXT: slli t3, t3, 8
-; RV32IM-NEXT: or t3, t5, t3
-; RV32IM-NEXT: and s7, a1, s10
-; RV32IM-NEXT: slli s9, a1, 24
-; RV32IM-NEXT: and a1, s2, s10
-; RV32IM-NEXT: or a1, a1, s4
-; RV32IM-NEXT: srli t6, a2, 8
-; RV32IM-NEXT: slli a6, a6, 8
-; RV32IM-NEXT: or t4, s3, a6
-; RV32IM-NEXT: srli s0, a2, 24
-; RV32IM-NEXT: and a6, s6, s10
-; RV32IM-NEXT: or t5, a6, s8
-; RV32IM-NEXT: and a6, a2, s10
-; RV32IM-NEXT: slli s3, a2, 24
-; RV32IM-NEXT: slli s7, s7, 8
-; RV32IM-NEXT: or a2, s9, s7
-; RV32IM-NEXT: srli s4, t2, 8
-; RV32IM-NEXT: and t6, t6, s10
-; RV32IM-NEXT: or t6, t6, s0
-; RV32IM-NEXT: srli s6, t2, 24
-; RV32IM-NEXT: slli a6, a6, 8
-; RV32IM-NEXT: or s0, s3, a6
-; RV32IM-NEXT: and s2, t2, s10
+; RV32IM-NEXT: or a3, s2, t3
+; RV32IM-NEXT: srli t6, a0, 8
+; RV32IM-NEXT: and a6, a6, s11
+; RV32IM-NEXT: or a6, a6, t0
+; RV32IM-NEXT: srli s1, a0, 24
+; RV32IM-NEXT: slli a4, a4, 8
+; RV32IM-NEXT: or a4, t4, a4
+; RV32IM-NEXT: and t4, a0, s11
+; RV32IM-NEXT: slli s7, a0, 24
+; RV32IM-NEXT: and a0, s5, s11
+; RV32IM-NEXT: or a0, a0, t5
+; RV32IM-NEXT: srli t3, t1, 8
+; RV32IM-NEXT: slli s3, s3, 8
+; RV32IM-NEXT: or t0, s4, s3
+; RV32IM-NEXT: srli s3, t1, 24
+; RV32IM-NEXT: and t5, t6, s11
+; RV32IM-NEXT: or s2, t5, s1
+; RV32IM-NEXT: and s4, t1, s11
+; RV32IM-NEXT: slli t1, t1, 24
+; RV32IM-NEXT: slli t4, t4, 8
+; RV32IM-NEXT: or t4, s7, t4
+; RV32IM-NEXT: srli t5, a5, 8
+; RV32IM-NEXT: and t3, t3, s11
+; RV32IM-NEXT: or t3, t3, s3
+; RV32IM-NEXT: srli t6, a5, 24
+; RV32IM-NEXT: slli s4, s4, 8
+; RV32IM-NEXT: or t1, t1, s4
+; RV32IM-NEXT: and s3, a5, s11
+; RV32IM-NEXT: slli s4, a5, 24
+; RV32IM-NEXT: and a5, t5, s11
+; RV32IM-NEXT: or a5, a5, t6
+; RV32IM-NEXT: srli t5, t2, 8
+; RV32IM-NEXT: slli s3, s3, 8
+; RV32IM-NEXT: or t6, s4, s3
+; RV32IM-NEXT: srli s1, t2, 24
+; RV32IM-NEXT: and t5, t5, s11
+; RV32IM-NEXT: or t5, t5, s1
+; RV32IM-NEXT: and s1, t2, s11
; RV32IM-NEXT: slli t2, t2, 24
-; RV32IM-NEXT: and a6, s4, s10
-; RV32IM-NEXT: or a6, a6, s6
-; RV32IM-NEXT: srli s3, t0, 8
-; RV32IM-NEXT: slli s2, s2, 8
-; RV32IM-NEXT: or s2, t2, s2
-; RV32IM-NEXT: srli t2, t0, 24
-; RV32IM-NEXT: and s3, s3, s10
-; RV32IM-NEXT: or s3, s3, t2
-; RV32IM-NEXT: and t2, t0, s10
-; RV32IM-NEXT: sw s10, 744(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli t0, t0, 24
-; RV32IM-NEXT: slli t2, t2, 8
-; RV32IM-NEXT: or s4, t0, t2
-; RV32IM-NEXT: or a0, a3, a0
-; RV32IM-NEXT: or a4, a5, a4
-; RV32IM-NEXT: or a3, t3, a7
-; RV32IM-NEXT: or a1, t4, a1
-; RV32IM-NEXT: or a2, a2, t5
-; RV32IM-NEXT: addi t2, t1, -241
-; RV32IM-NEXT: sw t2, 732(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: addi t1, s1, 819
-; RV32IM-NEXT: or a5, s0, t6
-; RV32IM-NEXT: addi t0, s11, 1365
-; RV32IM-NEXT: or a6, s2, a6
-; RV32IM-NEXT: or a7, s4, s3
-; RV32IM-NEXT: srli t3, a0, 4
-; RV32IM-NEXT: and a0, a0, t2
-; RV32IM-NEXT: srli t4, a4, 4
-; RV32IM-NEXT: and a4, a4, t2
-; RV32IM-NEXT: and t3, t3, t2
-; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: or a0, t3, a0
+; RV32IM-NEXT: slli s1, s1, 8
+; RV32IM-NEXT: or s1, t2, s1
+; RV32IM-NEXT: addi t2, s6, 1365
+; RV32IM-NEXT: slli ra, a7, 11
+; RV32IM-NEXT: or a1, a1, s0
+; RV32IM-NEXT: or a2, a3, a2
+; RV32IM-NEXT: or a3, a4, a6
+; RV32IM-NEXT: or a0, t0, a0
+; RV32IM-NEXT: or a4, t4, s2
+; RV32IM-NEXT: or a6, t1, t3
+; RV32IM-NEXT: or a5, t6, a5
+; RV32IM-NEXT: or a7, s1, t5
+; RV32IM-NEXT: srli t0, a1, 4
+; RV32IM-NEXT: sw s10, 648(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a1, a1, s10
+; RV32IM-NEXT: srli t1, a2, 4
+; RV32IM-NEXT: and a2, a2, s10
; RV32IM-NEXT: srli t3, a3, 4
-; RV32IM-NEXT: and a3, a3, t2
-; RV32IM-NEXT: and t4, t4, t2
-; RV32IM-NEXT: slli a4, a4, 4
-; RV32IM-NEXT: or a4, t4, a4
-; RV32IM-NEXT: srli t4, a1, 4
-; RV32IM-NEXT: and a1, a1, t2
-; RV32IM-NEXT: and t3, t3, t2
-; RV32IM-NEXT: slli a3, a3, 4
-; RV32IM-NEXT: or a3, t3, a3
-; RV32IM-NEXT: srli t3, a2, 4
-; RV32IM-NEXT: and a2, a2, t2
-; RV32IM-NEXT: and t4, t4, t2
+; RV32IM-NEXT: and a3, a3, s10
+; RV32IM-NEXT: srli t4, a0, 4
+; RV32IM-NEXT: and a0, a0, s10
+; RV32IM-NEXT: srli t5, a4, 4
+; RV32IM-NEXT: and a4, a4, s10
+; RV32IM-NEXT: srli t6, a6, 4
+; RV32IM-NEXT: and a6, a6, s10
+; RV32IM-NEXT: srli s0, a5, 4
+; RV32IM-NEXT: and a5, a5, s10
+; RV32IM-NEXT: srli s1, a7, 4
+; RV32IM-NEXT: and a7, a7, s10
+; RV32IM-NEXT: and t0, t0, s10
; RV32IM-NEXT: slli a1, a1, 4
-; RV32IM-NEXT: or a1, t4, a1
-; RV32IM-NEXT: srli t4, a5, 4
-; RV32IM-NEXT: and a5, a5, t2
-; RV32IM-NEXT: and t3, t3, t2
+; RV32IM-NEXT: and t1, t1, s10
; RV32IM-NEXT: slli a2, a2, 4
-; RV32IM-NEXT: or a2, t3, a2
-; RV32IM-NEXT: srli t3, a6, 4
-; RV32IM-NEXT: and a6, a6, t2
-; RV32IM-NEXT: and t4, t4, t2
-; RV32IM-NEXT: slli a5, a5, 4
-; RV32IM-NEXT: or a5, t4, a5
-; RV32IM-NEXT: srli t4, a7, 4
-; RV32IM-NEXT: and a7, a7, t2
-; RV32IM-NEXT: and t3, t3, t2
+; RV32IM-NEXT: and t3, t3, s10
+; RV32IM-NEXT: slli a3, a3, 4
+; RV32IM-NEXT: and t4, t4, s10
+; RV32IM-NEXT: slli a0, a0, 4
+; RV32IM-NEXT: and t5, t5, s10
+; RV32IM-NEXT: slli a4, a4, 4
+; RV32IM-NEXT: and t6, t6, s10
; RV32IM-NEXT: slli a6, a6, 4
-; RV32IM-NEXT: and t4, t4, t2
+; RV32IM-NEXT: and s0, s0, s10
+; RV32IM-NEXT: slli a5, a5, 4
+; RV32IM-NEXT: and s1, s1, s10
; RV32IM-NEXT: slli a7, a7, 4
-; RV32IM-NEXT: or a6, t3, a6
-; RV32IM-NEXT: or a7, t4, a7
-; RV32IM-NEXT: srli t3, a0, 2
-; RV32IM-NEXT: sw t1, 736(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, a0, t1
-; RV32IM-NEXT: srli t4, a4, 2
-; RV32IM-NEXT: and a4, a4, t1
-; RV32IM-NEXT: and t3, t3, t1
-; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or a0, t3, a0
-; RV32IM-NEXT: srli t3, a3, 2
-; RV32IM-NEXT: and a3, a3, t1
-; RV32IM-NEXT: and t4, t4, t1
-; RV32IM-NEXT: slli a4, a4, 2
-; RV32IM-NEXT: or a4, t4, a4
-; RV32IM-NEXT: srli t4, a1, 2
-; RV32IM-NEXT: and a1, a1, t1
-; RV32IM-NEXT: and t3, t3, t1
-; RV32IM-NEXT: slli a3, a3, 2
+; RV32IM-NEXT: or a1, t0, a1
+; RV32IM-NEXT: or a2, t1, a2
; RV32IM-NEXT: or a3, t3, a3
-; RV32IM-NEXT: srli t3, a2, 2
-; RV32IM-NEXT: and a2, a2, t1
-; RV32IM-NEXT: and t4, t4, t1
+; RV32IM-NEXT: or a0, t4, a0
+; RV32IM-NEXT: or a4, t5, a4
+; RV32IM-NEXT: or a6, t6, a6
+; RV32IM-NEXT: or a5, s0, a5
+; RV32IM-NEXT: or a7, s1, a7
+; RV32IM-NEXT: srli t0, a1, 2
+; RV32IM-NEXT: sw s8, 632(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a1, a1, s8
+; RV32IM-NEXT: srli t1, a2, 2
+; RV32IM-NEXT: and a2, a2, s8
+; RV32IM-NEXT: srli t3, a3, 2
+; RV32IM-NEXT: and a3, a3, s8
+; RV32IM-NEXT: srli t4, a0, 2
+; RV32IM-NEXT: and a0, a0, s8
+; RV32IM-NEXT: srli t5, a4, 2
+; RV32IM-NEXT: and a4, a4, s8
+; RV32IM-NEXT: srli t6, a6, 2
+; RV32IM-NEXT: and a6, a6, s8
+; RV32IM-NEXT: srli s0, a5, 2
+; RV32IM-NEXT: and a5, a5, s8
+; RV32IM-NEXT: srli s1, a7, 2
+; RV32IM-NEXT: and a7, a7, s8
+; RV32IM-NEXT: and t0, t0, s8
; RV32IM-NEXT: slli a1, a1, 2
-; RV32IM-NEXT: or a1, t4, a1
-; RV32IM-NEXT: srli t4, a5, 2
-; RV32IM-NEXT: and a5, a5, t1
-; RV32IM-NEXT: and t3, t3, t1
+; RV32IM-NEXT: and t1, t1, s8
; RV32IM-NEXT: slli a2, a2, 2
-; RV32IM-NEXT: or t3, t3, a2
-; RV32IM-NEXT: srli a2, a6, 2
-; RV32IM-NEXT: and a6, a6, t1
-; RV32IM-NEXT: and t4, t4, t1
-; RV32IM-NEXT: slli a5, a5, 2
-; RV32IM-NEXT: or a5, t4, a5
-; RV32IM-NEXT: srli t4, a7, 2
-; RV32IM-NEXT: and a7, a7, t1
-; RV32IM-NEXT: and a2, a2, t1
+; RV32IM-NEXT: and t3, t3, s8
+; RV32IM-NEXT: slli a3, a3, 2
+; RV32IM-NEXT: and t4, t4, s8
+; RV32IM-NEXT: slli a0, a0, 2
+; RV32IM-NEXT: and t5, t5, s8
+; RV32IM-NEXT: slli a4, a4, 2
+; RV32IM-NEXT: and t6, t6, s8
; RV32IM-NEXT: slli a6, a6, 2
-; RV32IM-NEXT: and t4, t4, t1
+; RV32IM-NEXT: and s0, s0, s8
+; RV32IM-NEXT: slli a5, a5, 2
+; RV32IM-NEXT: and s1, s1, s8
; RV32IM-NEXT: slli a7, a7, 2
-; RV32IM-NEXT: or a6, a2, a6
-; RV32IM-NEXT: or t4, t4, a7
-; RV32IM-NEXT: srli a2, a0, 1
-; RV32IM-NEXT: sw t0, 740(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, a0, t0
-; RV32IM-NEXT: srli a7, a4, 1
-; RV32IM-NEXT: and a4, a4, t0
-; RV32IM-NEXT: and a2, a2, t0
-; RV32IM-NEXT: slli a0, a0, 1
-; RV32IM-NEXT: or a2, a2, a0
-; RV32IM-NEXT: srli a0, a3, 1
-; RV32IM-NEXT: and a3, a3, t0
-; RV32IM-NEXT: and a7, a7, t0
+; RV32IM-NEXT: or a1, t0, a1
+; RV32IM-NEXT: or a2, t1, a2
+; RV32IM-NEXT: or a3, t3, a3
+; RV32IM-NEXT: or a0, t4, a0
+; RV32IM-NEXT: or a4, t5, a4
+; RV32IM-NEXT: or a6, t6, a6
+; RV32IM-NEXT: or a5, s0, a5
+; RV32IM-NEXT: or a7, s1, a7
+; RV32IM-NEXT: srli t0, a1, 1
+; RV32IM-NEXT: sw t2, 628(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a1, a1, t2
+; RV32IM-NEXT: srli t1, a2, 1
+; RV32IM-NEXT: and a2, a2, t2
+; RV32IM-NEXT: srli t3, a3, 1
+; RV32IM-NEXT: and a3, a3, t2
+; RV32IM-NEXT: srli t4, a0, 1
+; RV32IM-NEXT: and a0, a0, t2
+; RV32IM-NEXT: srli t5, a4, 1
+; RV32IM-NEXT: and a4, a4, t2
+; RV32IM-NEXT: srli t6, a6, 1
+; RV32IM-NEXT: and a6, a6, t2
+; RV32IM-NEXT: srli s0, a5, 1
+; RV32IM-NEXT: and a5, a5, t2
+; RV32IM-NEXT: srli s1, a7, 1
+; RV32IM-NEXT: and a7, a7, t2
+; RV32IM-NEXT: and t0, t0, t2
+; RV32IM-NEXT: slli a1, a1, 1
+; RV32IM-NEXT: and t1, t1, t2
+; RV32IM-NEXT: slli a2, a2, 1
+; RV32IM-NEXT: and t3, t3, t2
+; RV32IM-NEXT: slli a3, a3, 1
+; RV32IM-NEXT: and t4, t4, t2
+; RV32IM-NEXT: slli s2, a0, 1
+; RV32IM-NEXT: and t5, t5, t2
; RV32IM-NEXT: slli a4, a4, 1
-; RV32IM-NEXT: or a4, a7, a4
-; RV32IM-NEXT: srli a7, a1, 1
-; RV32IM-NEXT: and t5, a1, t0
-; RV32IM-NEXT: and a0, a0, t0
-; RV32IM-NEXT: slli a1, a3, 1
-; RV32IM-NEXT: or a1, a0, a1
-; RV32IM-NEXT: srli a3, t3, 1
-; RV32IM-NEXT: and t3, t3, t0
-; RV32IM-NEXT: and a0, a7, t0
-; RV32IM-NEXT: slli t5, t5, 1
-; RV32IM-NEXT: or a0, a0, t5
-; RV32IM-NEXT: srli a7, a5, 1
-; RV32IM-NEXT: and a5, a5, t0
-; RV32IM-NEXT: and a3, a3, t0
-; RV32IM-NEXT: slli t3, t3, 1
-; RV32IM-NEXT: or a3, a3, t3
-; RV32IM-NEXT: srli t3, a6, 1
-; RV32IM-NEXT: and a6, a6, t0
-; RV32IM-NEXT: and a7, a7, t0
-; RV32IM-NEXT: slli a5, a5, 1
-; RV32IM-NEXT: or a7, a7, a5
-; RV32IM-NEXT: srli a5, t4, 1
-; RV32IM-NEXT: and t4, t4, t0
-; RV32IM-NEXT: and t3, t3, t0
+; RV32IM-NEXT: and t6, t6, t2
; RV32IM-NEXT: slli a6, a6, 1
-; RV32IM-NEXT: and a5, a5, t0
-; RV32IM-NEXT: slli t4, t4, 1
-; RV32IM-NEXT: or a6, t3, a6
-; RV32IM-NEXT: or a5, a5, t4
-; RV32IM-NEXT: srli t3, a2, 8
-; RV32IM-NEXT: srli t4, a2, 24
-; RV32IM-NEXT: and t3, t3, s10
-; RV32IM-NEXT: or t0, t3, t4
-; RV32IM-NEXT: sw t0, 656(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s0, s0, t2
+; RV32IM-NEXT: slli a5, a5, 1
+; RV32IM-NEXT: and s1, s1, t2
+; RV32IM-NEXT: slli a7, a7, 1
+; RV32IM-NEXT: or a0, t0, a1
+; RV32IM-NEXT: or a1, t1, a2
+; RV32IM-NEXT: or a2, t3, a3
+; RV32IM-NEXT: or a3, t4, s2
+; RV32IM-NEXT: or s2, t5, a4
+; RV32IM-NEXT: or s7, t6, a6
+; RV32IM-NEXT: or s4, s0, a5
+; RV32IM-NEXT: or s3, s1, a7
+; RV32IM-NEXT: srli a4, a0, 8
+; RV32IM-NEXT: srli a5, a0, 24
+; RV32IM-NEXT: slli a6, a0, 24
+; RV32IM-NEXT: and a7, a0, s11
+; RV32IM-NEXT: srli t0, a1, 8
+; RV32IM-NEXT: srli t1, a1, 24
+; RV32IM-NEXT: slli t3, a1, 24
+; RV32IM-NEXT: and t4, a1, s11
+; RV32IM-NEXT: and a4, a4, s11
+; RV32IM-NEXT: or s9, a4, a5
+; RV32IM-NEXT: and a4, a1, ra
+; RV32IM-NEXT: slli a7, a7, 8
+; RV32IM-NEXT: or a5, a6, a7
+; RV32IM-NEXT: sw a5, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t2, 1
+; RV32IM-NEXT: and a6, a1, t2
+; RV32IM-NEXT: and a5, t0, s11
+; RV32IM-NEXT: slli t4, t4, 8
+; RV32IM-NEXT: or a5, a5, t1
+; RV32IM-NEXT: sw a5, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, a2, a4
+; RV32IM-NEXT: or a7, t3, t4
+; RV32IM-NEXT: sw a7, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a7, a2, a6
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: sw a5, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t1, 64
+; RV32IM-NEXT: and t0, a1, t1
+; RV32IM-NEXT: lui t3, 128
+; RV32IM-NEXT: and t6, a1, t3
+; RV32IM-NEXT: mul a5, a2, t0
+; RV32IM-NEXT: mul a7, a2, t6
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: sw a5, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 16384
+; RV32IM-NEXT: lui a7, 32768
+; RV32IM-NEXT: and a5, a1, a5
+; RV32IM-NEXT: sw a5, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t4, 16384
+; RV32IM-NEXT: and a7, a1, a7
+; RV32IM-NEXT: sw a7, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t5, 32768
+; RV32IM-NEXT: mul a5, a2, a5
+; RV32IM-NEXT: mul a7, a2, a7
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: sw a5, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a3, ra
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: and a7, a3, t2
+; RV32IM-NEXT: lui s6, 1
+; RV32IM-NEXT: mul a7, a0, a7
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: sw a5, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a3, t1
+; RV32IM-NEXT: lui s1, 64
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: and a7, a3, t3
+; RV32IM-NEXT: lui s8, 128
+; RV32IM-NEXT: mul a7, a0, a7
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: sw a5, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a3, t4
+; RV32IM-NEXT: lui t2, 16384
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: and a7, a3, t5
+; RV32IM-NEXT: lui t4, 32768
+; RV32IM-NEXT: mul a7, a0, a7
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: sw a5, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli a5, s2, 8
+; RV32IM-NEXT: sw s11, 644(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, a5, s11
+; RV32IM-NEXT: srli a7, s2, 24
+; RV32IM-NEXT: or a5, a5, a7
+; RV32IM-NEXT: sw a5, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, s2, s11
+; RV32IM-NEXT: slli a5, a5, 8
+; RV32IM-NEXT: slli a7, s2, 24
+; RV32IM-NEXT: or a5, a7, a5
+; RV32IM-NEXT: sw a5, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli a5, s7, 8
+; RV32IM-NEXT: and a5, a5, s11
+; RV32IM-NEXT: srli a7, s7, 24
+; RV32IM-NEXT: or a5, a5, a7
+; RV32IM-NEXT: sw a5, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, s7, s11
+; RV32IM-NEXT: slli a5, a5, 8
+; RV32IM-NEXT: slli a7, s7, 24
+; RV32IM-NEXT: or a5, a7, a5
+; RV32IM-NEXT: sw a5, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s10, s7, ra
+; RV32IM-NEXT: sw ra, 624(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and s11, s7, s6
+; RV32IM-NEXT: mul a5, s4, s10
+; RV32IM-NEXT: mul a7, s4, s11
+; RV32IM-NEXT: xor s5, a5, a7
+; RV32IM-NEXT: and a5, s7, s1
+; RV32IM-NEXT: and a7, s7, s8
+; RV32IM-NEXT: mul t1, s4, a5
+; RV32IM-NEXT: mul t3, s4, a7
+; RV32IM-NEXT: xor s0, t1, t3
+; RV32IM-NEXT: and t1, s7, t2
+; RV32IM-NEXT: sw t1, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t4, s7, t4
+; RV32IM-NEXT: sw t4, 620(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t1, s4, t1
+; RV32IM-NEXT: mul t3, s4, t4
+; RV32IM-NEXT: xor t1, t1, t3
+; RV32IM-NEXT: sw t1, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t1, s3, ra
+; RV32IM-NEXT: mul t1, s2, t1
+; RV32IM-NEXT: and t3, s3, s6
+; RV32IM-NEXT: mul t3, s2, t3
+; RV32IM-NEXT: xor t3, t1, t3
+; RV32IM-NEXT: and t1, s3, s1
+; RV32IM-NEXT: mul t1, s2, t1
+; RV32IM-NEXT: and t4, s3, s8
+; RV32IM-NEXT: mul t4, s2, t4
+; RV32IM-NEXT: xor t1, t1, t4
+; RV32IM-NEXT: and t4, s3, t2
+; RV32IM-NEXT: mul t4, s2, t4
+; RV32IM-NEXT: and t5, s3, t5
+; RV32IM-NEXT: mul t5, s2, t5
+; RV32IM-NEXT: xor t4, t4, t5
+; RV32IM-NEXT: mul a4, a0, a4
+; RV32IM-NEXT: mul a6, a0, a6
+; RV32IM-NEXT: xor a4, a4, a6
+; RV32IM-NEXT: sw a4, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, a0, t0
+; RV32IM-NEXT: mul a6, a0, t6
+; RV32IM-NEXT: xor a4, a4, a6
+; RV32IM-NEXT: sw a4, 404(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a4, 4096
+; RV32IM-NEXT: lui a6, 8192
+; RV32IM-NEXT: and t0, a1, a4
+; RV32IM-NEXT: lui t2, 4096
+; RV32IM-NEXT: and t5, a1, a6
+; RV32IM-NEXT: sw t5, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, a0, t0
+; RV32IM-NEXT: mul t5, a0, t5
+; RV32IM-NEXT: xor a4, a4, t5
+; RV32IM-NEXT: sw a4, 356(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, s2, s10
+; RV32IM-NEXT: mul t5, s2, s11
+; RV32IM-NEXT: xor a4, a4, t5
+; RV32IM-NEXT: sw a4, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, s2, a5
+; RV32IM-NEXT: mul a5, s2, a7
+; RV32IM-NEXT: xor a4, a4, a5
+; RV32IM-NEXT: sw a4, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, s7, t2
+; RV32IM-NEXT: sw a4, 428(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a5, s7, a6
+; RV32IM-NEXT: sw a5, 424(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, s2, a4
+; RV32IM-NEXT: mul a5, s2, a5
+; RV32IM-NEXT: xor a4, a4, a5
+; RV32IM-NEXT: sw a4, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a4, a4, s9
+; RV32IM-NEXT: sw a4, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a4, a5, a4
+; RV32IM-NEXT: sw a4, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 2
+; RV32IM-NEXT: and a5, a1, a5
+; RV32IM-NEXT: sw a5, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t6, 2
+; RV32IM-NEXT: mul a4, a2, a5
+; RV32IM-NEXT: lw a5, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: sw a4, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a4, 256
+; RV32IM-NEXT: and a4, a1, a4
+; RV32IM-NEXT: sw a4, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t2, 256
+; RV32IM-NEXT: mul a4, a2, a4
+; RV32IM-NEXT: lw a5, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: sw a4, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a4, 65536
+; RV32IM-NEXT: and a5, a1, a4
+; RV32IM-NEXT: sw a5, 376(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t5, a2, a5
+; RV32IM-NEXT: lw a5, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, t5
+; RV32IM-NEXT: sw a5, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, t6
+; RV32IM-NEXT: mul t5, a0, t5
+; RV32IM-NEXT: lw a5, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, t5
+; RV32IM-NEXT: sw a5, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, t2
+; RV32IM-NEXT: mul t5, a0, t5
+; RV32IM-NEXT: lw a5, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, t5
+; RV32IM-NEXT: sw a5, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, a4
+; RV32IM-NEXT: mul t5, a0, t5
+; RV32IM-NEXT: lw a4, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, t5
+; RV32IM-NEXT: sw a4, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a4, a5, a4
+; RV32IM-NEXT: sw a4, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or a4, a5, a4
+; RV32IM-NEXT: sw a4, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t2, 2
+; RV32IM-NEXT: and a4, s7, t2
+; RV32IM-NEXT: sw a4, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t6, s4, a4
+; RV32IM-NEXT: xor a4, s5, t6
+; RV32IM-NEXT: sw a4, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a4, 256
+; RV32IM-NEXT: and a5, s7, a4
+; RV32IM-NEXT: sw a5, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t6, s4, a5
+; RV32IM-NEXT: xor a5, s0, t6
+; RV32IM-NEXT: sw a5, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t5, 65536
+; RV32IM-NEXT: and a5, s7, t5
+; RV32IM-NEXT: sw a5, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t6, s4, a5
+; RV32IM-NEXT: lw a5, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, t6
+; RV32IM-NEXT: sw a5, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, s3, t2
+; RV32IM-NEXT: mul t6, s2, t6
+; RV32IM-NEXT: xor a5, t3, t6
+; RV32IM-NEXT: sw a5, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t3, s3, a4
+; RV32IM-NEXT: mul t3, s2, t3
+; RV32IM-NEXT: xor a4, t1, t3
+; RV32IM-NEXT: sw a4, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t1, s3, t5
+; RV32IM-NEXT: mul t1, s2, t1
+; RV32IM-NEXT: xor a4, t4, t1
+; RV32IM-NEXT: sw a4, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a1, 2
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: andi t3, a3, 2
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a2, 1
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t4, a0, 1
+; RV32IM-NEXT: and a4, t3, t4
+; RV32IM-NEXT: sw a4, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a1, 4
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: andi t3, a3, 4
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a2, 2
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t4, a0, 2
+; RV32IM-NEXT: and a4, t3, t4
+; RV32IM-NEXT: sw a4, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a1, 8
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: andi t3, a3, 8
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a2, 3
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t4, a0, 3
+; RV32IM-NEXT: and a4, t3, t4
+; RV32IM-NEXT: sw a4, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 496(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a1, 16
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: andi t3, a3, 16
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a2, 4
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 400(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t4, a0, 4
+; RV32IM-NEXT: and a4, t3, t4
+; RV32IM-NEXT: sw a4, 416(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a1, 32
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: andi t3, a3, 32
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a2, 5
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 372(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t4, a0, 5
+; RV32IM-NEXT: and a4, t3, t4
+; RV32IM-NEXT: sw a4, 388(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a1, 64
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: andi t3, a3, 64
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a2, 6
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t4, a0, 6
+; RV32IM-NEXT: and a4, t3, t4
+; RV32IM-NEXT: sw a4, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a1, 128
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: andi t3, a3, 128
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a2, 7
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 340(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t4, a0, 7
+; RV32IM-NEXT: and a4, t3, t4
+; RV32IM-NEXT: sw a4, 348(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 384(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a1, 256
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: andi t3, a3, 256
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a2, 8
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 320(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t4, a0, 8
+; RV32IM-NEXT: and a4, t3, t4
+; RV32IM-NEXT: sw a4, 332(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 360(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t1, a1, 512
+; RV32IM-NEXT: seqz t1, t1
+; RV32IM-NEXT: andi t3, a3, 512
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: addi t1, t1, -1
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: slli t4, a2, 9
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 368(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli t4, a0, 9
+; RV32IM-NEXT: and a4, t3, t4
+; RV32IM-NEXT: sw a4, 380(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a4, t1, t4
+; RV32IM-NEXT: sw a4, 432(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a6, 4
+; RV32IM-NEXT: lui a7, 8
+; RV32IM-NEXT: lui t1, 32
+; RV32IM-NEXT: lui s6, 512
; RV32IM-NEXT: lui s8, 1024
-; RV32IM-NEXT: slli t0, ra, 11
-; RV32IM-NEXT: lui s6, 2048
-; RV32IM-NEXT: lui s4, 4096
-; RV32IM-NEXT: lui s5, 8192
-; RV32IM-NEXT: lui s9, 16384
-; RV32IM-NEXT: lui s11, 32768
-; RV32IM-NEXT: lui ra, 65536
-; RV32IM-NEXT: lui t6, 131072
-; RV32IM-NEXT: andi t1, a0, 2
-; RV32IM-NEXT: sw t1, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 1
-; RV32IM-NEXT: sw t1, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 4
-; RV32IM-NEXT: sw t1, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 8
-; RV32IM-NEXT: sw t1, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 16
-; RV32IM-NEXT: sw t1, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 32
-; RV32IM-NEXT: sw t1, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 64
-; RV32IM-NEXT: sw t1, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 128
-; RV32IM-NEXT: sw t1, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 256
-; RV32IM-NEXT: sw t1, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 512
-; RV32IM-NEXT: sw t1, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a0, 1024
-; RV32IM-NEXT: sw t1, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and t1, a0, t0
-; RV32IM-NEXT: sw t1, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: sw t0, 728(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s0, 1
-; RV32IM-NEXT: and t1, a0, s0
-; RV32IM-NEXT: sw t1, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s1, 2
-; RV32IM-NEXT: and t1, a0, s1
-; RV32IM-NEXT: sw t1, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t1, 4
-; RV32IM-NEXT: and t2, a0, t1
-; RV32IM-NEXT: sw t2, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t5, 8
-; RV32IM-NEXT: and t2, a0, t5
-; RV32IM-NEXT: sw t2, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s7, 16
-; RV32IM-NEXT: and t2, a0, s7
-; RV32IM-NEXT: sw t2, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s2, 32
-; RV32IM-NEXT: and t2, a0, s2
-; RV32IM-NEXT: sw t2, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s10, 64
-; RV32IM-NEXT: and t2, a0, s10
-; RV32IM-NEXT: sw t2, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t3, 128
-; RV32IM-NEXT: and t2, a0, t3
-; RV32IM-NEXT: sw t2, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t4, 256
-; RV32IM-NEXT: and t2, a0, t4
-; RV32IM-NEXT: sw t2, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t2, 512
-; RV32IM-NEXT: and s3, a0, t2
-; RV32IM-NEXT: sw s3, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a0, s8
-; RV32IM-NEXT: sw s3, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a0, s6
-; RV32IM-NEXT: sw s3, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a0, s4
-; RV32IM-NEXT: sw s3, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a0, s5
-; RV32IM-NEXT: sw s3, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a0, s9
-; RV32IM-NEXT: sw s3, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a0, s11
-; RV32IM-NEXT: sw s3, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a0, ra
-; RV32IM-NEXT: sw s3, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a0, t6
-; RV32IM-NEXT: sw s3, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t6, 262144
-; RV32IM-NEXT: and s3, a0, t6
-; RV32IM-NEXT: sw s3, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t6, 524288
-; RV32IM-NEXT: and a0, a0, t6
-; RV32IM-NEXT: lui s6, 524288
-; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 2
-; RV32IM-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 1
-; RV32IM-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 4
-; RV32IM-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 8
-; RV32IM-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 16
-; RV32IM-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 32
-; RV32IM-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 64
-; RV32IM-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 128
-; RV32IM-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 256
-; RV32IM-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 512
-; RV32IM-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a4, 1024
-; RV32IM-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s3, a4, t0
-; RV32IM-NEXT: sw s3, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and ra, a4, s0
-; RV32IM-NEXT: and s5, a4, s1
-; RV32IM-NEXT: and s9, a4, t1
-; RV32IM-NEXT: and s4, a4, t5
-; RV32IM-NEXT: and s8, a4, s7
-; RV32IM-NEXT: and s11, a4, s2
-; RV32IM-NEXT: and s10, a4, s10
-; RV32IM-NEXT: and s1, a4, t3
-; RV32IM-NEXT: and s2, a4, t4
-; RV32IM-NEXT: and s0, a4, t2
-; RV32IM-NEXT: lui a0, 1024
-; RV32IM-NEXT: and t6, a4, a0
-; RV32IM-NEXT: lui a0, 2048
-; RV32IM-NEXT: and t5, a4, a0
-; RV32IM-NEXT: lui a0, 4096
-; RV32IM-NEXT: and t3, a4, a0
-; RV32IM-NEXT: lui a0, 8192
-; RV32IM-NEXT: and t4, a4, a0
-; RV32IM-NEXT: lui a0, 16384
-; RV32IM-NEXT: and t2, a4, a0
-; RV32IM-NEXT: lui a0, 32768
-; RV32IM-NEXT: and t1, a4, a0
-; RV32IM-NEXT: lui a0, 65536
-; RV32IM-NEXT: and t0, a4, a0
-; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and a0, a4, a0
-; RV32IM-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 262144
-; RV32IM-NEXT: and s7, a4, a0
-; RV32IM-NEXT: and s6, a4, s6
-; RV32IM-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 324(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, a0
-; RV32IM-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s3
-; RV32IM-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, ra
-; RV32IM-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s5
-; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s9
-; RV32IM-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s4
-; RV32IM-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s8
-; RV32IM-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mv s3, s8
-; RV32IM-NEXT: mul a0, a1, s11
-; RV32IM-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s10
-; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s1
-; RV32IM-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s2
-; RV32IM-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s0
-; RV32IM-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t6
-; RV32IM-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t5
-; RV32IM-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t3
-; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t4
-; RV32IM-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t2
-; RV32IM-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t1
-; RV32IM-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, t0
-; RV32IM-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s8, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a1, s8
-; RV32IM-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s7
-; RV32IM-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a1, s6
-; RV32IM-NEXT: mv a1, s6
-; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
+; RV32IM-NEXT: lui s9, 2048
+; RV32IM-NEXT: lui a4, 131072
+; RV32IM-NEXT: lui t2, 262144
+; RV32IM-NEXT: lui t5, 524288
+; RV32IM-NEXT: andi t3, a1, 1
+; RV32IM-NEXT: andi a5, a1, 1024
+; RV32IM-NEXT: sw a5, 344(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t4, a1, a6
+; RV32IM-NEXT: and t6, a1, a7
+; RV32IM-NEXT: lui a5, 16
+; RV32IM-NEXT: and s0, a1, a5
+; RV32IM-NEXT: and s1, a1, t1
+; RV32IM-NEXT: and s5, a1, s6
+; RV32IM-NEXT: and s10, a1, s8
+; RV32IM-NEXT: and s11, a1, s9
+; RV32IM-NEXT: and ra, a1, a4
+; RV32IM-NEXT: and a4, a1, t2
+; RV32IM-NEXT: and a1, a1, t5
+; RV32IM-NEXT: seqz t3, t3
+; RV32IM-NEXT: mul t0, a2, t0
+; RV32IM-NEXT: sw t0, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t0, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul t0, a2, t0
+; RV32IM-NEXT: sw t0, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi t0, a3, 1
+; RV32IM-NEXT: seqz t0, t0
+; RV32IM-NEXT: and t5, a3, a6
+; RV32IM-NEXT: mul a6, a0, t5
+; RV32IM-NEXT: sw a6, 280(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, a7
+; RV32IM-NEXT: mul a6, a0, t5
+; RV32IM-NEXT: sw a6, 328(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, a5
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 412(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, t1
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, s6
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 272(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, s8
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 316(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, s9
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 396(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 4096
+; RV32IM-NEXT: and t5, a3, a5
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 8192
+; RV32IM-NEXT: and t5, a3, a5
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 512(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 131072
+; RV32IM-NEXT: and t5, a3, a5
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 260(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t5, a3, t2
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 300(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a5, 524288
+; RV32IM-NEXT: and t5, a3, a5
+; RV32IM-NEXT: mul a5, a0, t5
+; RV32IM-NEXT: sw a5, 364(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a5, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a5, a0, a5
+; RV32IM-NEXT: mul a6, a2, t4
+; RV32IM-NEXT: sw a6, 252(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a0, t4
+; RV32IM-NEXT: sw a6, 276(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a2, t6
+; RV32IM-NEXT: sw a6, 288(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a0, t6
+; RV32IM-NEXT: sw a6, 312(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a2, s0
+; RV32IM-NEXT: sw a6, 352(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a0, s0
+; RV32IM-NEXT: sw a6, 408(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a2, s1
+; RV32IM-NEXT: sw a6, 440(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a0, s1
+; RV32IM-NEXT: sw a6, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a6, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a7, a0, a6
+; RV32IM-NEXT: mul a6, a2, s5
+; RV32IM-NEXT: sw a6, 244(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a0, s5
+; RV32IM-NEXT: sw a6, 264(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a2, s10
+; RV32IM-NEXT: sw a6, 284(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a0, s10
+; RV32IM-NEXT: sw a6, 308(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a2, s11
+; RV32IM-NEXT: sw a6, 336(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a0, s11
+; RV32IM-NEXT: sw a6, 392(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a6, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul t4, a0, a6
+; RV32IM-NEXT: lw a6, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a6, a0, a6
+; RV32IM-NEXT: sw a6, 256(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a6, 376(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a6, a0, a6
+; RV32IM-NEXT: sw a6, 296(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a2, ra
+; RV32IM-NEXT: sw a6, 236(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a0, ra
+; RV32IM-NEXT: sw a6, 376(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a6, a2, a4
+; RV32IM-NEXT: sw a6, 268(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, a0, a4
+; RV32IM-NEXT: sw a4, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a4, a2, a1
+; RV32IM-NEXT: sw a4, 324(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, a0, a1
+; RV32IM-NEXT: sw a1, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: addi t0, t0, -1
+; RV32IM-NEXT: slli a1, a2, 10
+; RV32IM-NEXT: and a2, t3, a2
+; RV32IM-NEXT: sw a2, 224(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, t0, a0
+; RV32IM-NEXT: sw a2, 228(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, t3, a0
+; RV32IM-NEXT: sw a2, 248(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a0, a0, 10
+; RV32IM-NEXT: andi a2, a3, 1024
+; RV32IM-NEXT: lw a3, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: seqz a2, a2
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: and a1, a3, a1
+; RV32IM-NEXT: sw a1, 292(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a2, a2, a0
+; RV32IM-NEXT: sw a2, 304(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a3, a0
; RV32IM-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 320(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s6, a2, a0
-; RV32IM-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a2, a0
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, ra
-; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s5
-; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s9
-; RV32IM-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s4
-; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s3
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s11
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s10
-; RV32IM-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s1
-; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s2
-; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s0
-; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t6
-; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t5
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t3
-; RV32IM-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t4
-; RV32IM-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t2
-; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t1
-; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, t0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s8
-; RV32IM-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, s7
-; RV32IM-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a2, a1
-; RV32IM-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli a0, a2, 24
-; RV32IM-NEXT: lw s10, 744(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a1, a2, s10
-; RV32IM-NEXT: slli a1, a1, 8
-; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a0, a4, 8
-; RV32IM-NEXT: and a0, a0, s10
-; RV32IM-NEXT: srli a1, a4, 24
-; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli a0, a4, 24
-; RV32IM-NEXT: and a1, a4, s10
-; RV32IM-NEXT: slli a1, a1, 8
-; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a5
+; RV32IM-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 324(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 328(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 368(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 316(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: xor a0, a0, a7
; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 364(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 344(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 336(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 320(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: xor a0, a0, t4
; RV32IM-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s6, a0
-; RV32IM-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 304(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: srli a0, a3, 8
-; RV32IM-NEXT: and a0, a0, s10
-; RV32IM-NEXT: srli a1, a3, 24
-; RV32IM-NEXT: or a0, a0, a1
-; RV32IM-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 2
-; RV32IM-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 1
-; RV32IM-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 4
-; RV32IM-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 8
-; RV32IM-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 16
-; RV32IM-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 32
-; RV32IM-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 64
-; RV32IM-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 128
-; RV32IM-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 256
-; RV32IM-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 512
-; RV32IM-NEXT: sw a0, 288(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a0, a5, 1024
-; RV32IM-NEXT: sw a0, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a1, a5, a0
-; RV32IM-NEXT: sw a1, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t6, 1
-; RV32IM-NEXT: and a1, a5, t6
-; RV32IM-NEXT: sw a1, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t5, 2
-; RV32IM-NEXT: and a1, a5, t5
-; RV32IM-NEXT: sw a1, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s0, 4
-; RV32IM-NEXT: and a1, a5, s0
-; RV32IM-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s5, 8
-; RV32IM-NEXT: and a1, a5, s5
-; RV32IM-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t3, 16
-; RV32IM-NEXT: and a1, a5, t3
-; RV32IM-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s6, 32
-; RV32IM-NEXT: and a1, a5, s6
-; RV32IM-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s7, 64
-; RV32IM-NEXT: and a1, a5, s7
-; RV32IM-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s3, 128
-; RV32IM-NEXT: and a1, a5, s3
-; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t4, 256
-; RV32IM-NEXT: and a1, a5, t4
-; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 512
-; RV32IM-NEXT: and a1, a5, a1
-; RV32IM-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui t2, 1024
-; RV32IM-NEXT: and a1, a5, t2
-; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s2, 2048
-; RV32IM-NEXT: and a1, a5, s2
-; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui s1, 4096
-; RV32IM-NEXT: and a1, a5, s1
-; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 8192
-; RV32IM-NEXT: and a1, a5, a1
-; RV32IM-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 16384
-; RV32IM-NEXT: and a1, a5, a1
-; RV32IM-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 32768
-; RV32IM-NEXT: and a1, a5, a1
-; RV32IM-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 65536
-; RV32IM-NEXT: and a1, a5, a1
-; RV32IM-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 131072
-; RV32IM-NEXT: and a1, a5, a1
-; RV32IM-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 262144
-; RV32IM-NEXT: and a1, a5, a1
-; RV32IM-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 524288
-; RV32IM-NEXT: and a1, a5, a1
-; RV32IM-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a4, a7, 2
-; RV32IM-NEXT: sw a4, 32(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a2, a7, 1
-; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a1, a7, 4
-; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a5, a7, 8
-; RV32IM-NEXT: sw a5, 16(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t0, a7, 16
-; RV32IM-NEXT: sw t0, 20(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a7, 32
-; RV32IM-NEXT: sw t1, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t1, a7, 64
-; RV32IM-NEXT: sw t1, 24(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi ra, a7, 128
-; RV32IM-NEXT: andi s9, a7, 256
-; RV32IM-NEXT: andi s11, a7, 512
-; RV32IM-NEXT: andi s4, a7, 1024
-; RV32IM-NEXT: and s10, a7, a0
-; RV32IM-NEXT: and s8, a7, t6
-; RV32IM-NEXT: sw s8, 36(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, a7, t5
-; RV32IM-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, a7, s0
-; RV32IM-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, a7, s5
-; RV32IM-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and a0, a7, t3
-; RV32IM-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s5, a7, s6
-; RV32IM-NEXT: and s6, a7, s7
-; RV32IM-NEXT: and s7, a7, s3
-; RV32IM-NEXT: and a0, a7, t4
-; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 512
-; RV32IM-NEXT: and t3, a7, a0
-; RV32IM-NEXT: and t2, a7, t2
-; RV32IM-NEXT: and s0, a7, s2
-; RV32IM-NEXT: and s2, a7, s1
-; RV32IM-NEXT: lui a0, 8192
-; RV32IM-NEXT: and s3, a7, a0
-; RV32IM-NEXT: lui a0, 16384
-; RV32IM-NEXT: and t6, a7, a0
-; RV32IM-NEXT: lui a0, 32768
-; RV32IM-NEXT: and t4, a7, a0
-; RV32IM-NEXT: lui a0, 65536
-; RV32IM-NEXT: and t5, a7, a0
-; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and s1, a7, a0
-; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 262144
-; RV32IM-NEXT: and a0, a7, a0
-; RV32IM-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 524288
-; RV32IM-NEXT: and a0, a7, a0
-; RV32IM-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, a4
-; RV32IM-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, a2
-; RV32IM-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, a1
-; RV32IM-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, a5
-; RV32IM-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t0
-; RV32IM-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a1
-; RV32IM-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t1
-; RV32IM-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, ra
+; RV32IM-NEXT: andi a0, s7, 2
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s3, 2
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, s4, 1
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 200(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, s2, 1
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
; RV32IM-NEXT: sw a0, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s9
-; RV32IM-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s11
-; RV32IM-NEXT: sw a0, 136(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s4
-; RV32IM-NEXT: mv a5, s4
-; RV32IM-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s10
-; RV32IM-NEXT: sw a0, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s8
+; RV32IM-NEXT: andi a0, s7, 4
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s3, 4
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, s4, 2
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 188(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, s2, 2
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
; RV32IM-NEXT: sw a0, 216(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 212(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s5
-; RV32IM-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s6
-; RV32IM-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s7
-; RV32IM-NEXT: mv a2, s7
+; RV32IM-NEXT: andi a0, s7, 8
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s3, 8
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, s4, 3
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 156(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, s2, 3
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, s7, 16
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s3, 16
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, s4, 4
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 136(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, s2, 4
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 148(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
; RV32IM-NEXT: sw a0, 176(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s7, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, s7
-; RV32IM-NEXT: sw a0, 324(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t3
-; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t2
-; RV32IM-NEXT: sw a0, 188(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s0
-; RV32IM-NEXT: mv t1, s0
-; RV32IM-NEXT: sw a0, 180(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s2
-; RV32IM-NEXT: sw a0, 292(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s3
-; RV32IM-NEXT: sw a0, 164(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t6
-; RV32IM-NEXT: mv t0, t6
-; RV32IM-NEXT: sw a0, 320(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t4
-; RV32IM-NEXT: mv a4, t4
-; RV32IM-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t5
-; RV32IM-NEXT: mv s8, t5
-; RV32IM-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s1
-; RV32IM-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 172(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 168(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 160(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 316(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 304(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 276(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 124(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a6, a3, a0
-; RV32IM-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 268(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 200(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 108(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 264(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t6, a3, a0
-; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s0, a3, a0
-; RV32IM-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s1, a3, a0
-; RV32IM-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s4, a3, a0
-; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a3, a1
-; RV32IM-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t4, a3, a0
-; RV32IM-NEXT: mul a0, a3, ra
-; RV32IM-NEXT: sw a0, 288(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s9, a3, s9
-; RV32IM-NEXT: mul a0, a3, s11
-; RV32IM-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a3, a5
-; RV32IM-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a3, s10
-; RV32IM-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 280(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s10, a3, a0
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t5, a3, a0
-; RV32IM-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 200(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a3, s5
-; RV32IM-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a3, s6
-; RV32IM-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s5, a3, a2
-; RV32IM-NEXT: mul s6, a3, s7
-; RV32IM-NEXT: mul a2, a3, t3
-; RV32IM-NEXT: mul a0, a3, t2
+; RV32IM-NEXT: andi a0, s7, 32
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s3, 32
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, s4, 5
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 124(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, s2, 5
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
; RV32IM-NEXT: sw a0, 152(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a3, t1
-; RV32IM-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t1, a3, s2
-; RV32IM-NEXT: mul t2, a3, s3
-; RV32IM-NEXT: mul a5, a3, t0
-; RV32IM-NEXT: mul a0, a3, a4
-; RV32IM-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a3, s8
-; RV32IM-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a3, a0
-; RV32IM-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli s11, a3, 24
-; RV32IM-NEXT: lw a0, 744(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a3, a3, a0
-; RV32IM-NEXT: slli a3, a3, 8
-; RV32IM-NEXT: or a3, s11, a3
-; RV32IM-NEXT: srli s11, a7, 8
-; RV32IM-NEXT: and s11, s11, a0
-; RV32IM-NEXT: srli ra, a7, 24
-; RV32IM-NEXT: or a4, s11, ra
-; RV32IM-NEXT: slli ra, a7, 24
-; RV32IM-NEXT: and a7, a7, a0
-; RV32IM-NEXT: slli a7, a7, 8
-; RV32IM-NEXT: or a7, ra, a7
-; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, a0, a1
-; RV32IM-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, a0, a1
-; RV32IM-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, a0, a1
-; RV32IM-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, a0, a1
-; RV32IM-NEXT: lw a0, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, a0, a1
-; RV32IM-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, a0, a1
-; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 180(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor ra, a0, a1
-; RV32IM-NEXT: lw a0, 172(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: andi a0, s7, 64
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s3, 64
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, s4, 6
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 172(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, s2, 6
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, s7, 128
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s3, 128
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, s4, 7
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 88(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, s2, 7
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, s7, 256
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s3, 256
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, s4, 8
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, s2, 8
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, s7, 512
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: andi a1, s3, 512
+; RV32IM-NEXT: seqz a1, a1
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: slli a2, s4, 9
+; RV32IM-NEXT: and a2, a0, a2
+; RV32IM-NEXT: sw a2, 108(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a2, s2, 9
+; RV32IM-NEXT: and a1, a1, a2
+; RV32IM-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a0, a0, a2
+; RV32IM-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s7, 1
+; RV32IM-NEXT: andi t6, s7, 1024
+; RV32IM-NEXT: lui t0, 4
+; RV32IM-NEXT: and a5, s7, t0
+; RV32IM-NEXT: lui s9, 8
+; RV32IM-NEXT: and a6, s7, s9
+; RV32IM-NEXT: lui a7, 16
+; RV32IM-NEXT: and t1, s7, a7
+; RV32IM-NEXT: lui s8, 32
+; RV32IM-NEXT: and t5, s7, s8
+; RV32IM-NEXT: lui t4, 512
+; RV32IM-NEXT: and s1, s7, t4
+; RV32IM-NEXT: lui s0, 1024
+; RV32IM-NEXT: and s5, s7, s0
+; RV32IM-NEXT: lui s6, 2048
+; RV32IM-NEXT: and a0, s7, s6
+; RV32IM-NEXT: lui s10, 131072
+; RV32IM-NEXT: and a1, s7, s10
+; RV32IM-NEXT: lui s11, 262144
+; RV32IM-NEXT: and a3, s7, s11
+; RV32IM-NEXT: lui ra, 524288
+; RV32IM-NEXT: and a4, s7, ra
+; RV32IM-NEXT: seqz t3, a2
+; RV32IM-NEXT: lw a2, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a2, s4, a2
+; RV32IM-NEXT: sw a2, 428(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a2, s4, a2
+; RV32IM-NEXT: sw a2, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a2, s3, 1
+; RV32IM-NEXT: seqz t2, a2
+; RV32IM-NEXT: and t0, s3, t0
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, s3, s9
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 84(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, s3, a7
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 180(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, s3, s8
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 220(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, s3, t4
+; RV32IM-NEXT: mul t4, s2, t0
+; RV32IM-NEXT: and t0, s3, s0
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 72(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, s3, s6
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 160(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a2, 4096
+; RV32IM-NEXT: and t0, s3, a2
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 208(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a2, 8192
+; RV32IM-NEXT: and t0, s3, a2
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 424(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, s3, s10
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, s3, s11
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 56(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t0, s3, ra
+; RV32IM-NEXT: mul a2, s2, t0
+; RV32IM-NEXT: sw a2, 140(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a7, s2, a2
+; RV32IM-NEXT: mul s10, s4, a5
+; RV32IM-NEXT: mul ra, s2, a5
+; RV32IM-NEXT: mul a2, s4, a6
+; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s2, a6
+; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s4, t1
+; RV32IM-NEXT: sw a2, 120(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s2, t1
+; RV32IM-NEXT: sw a2, 144(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s4, t5
+; RV32IM-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a2, s2, t5
+; RV32IM-NEXT: sw a2, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a2, s2, a2
+; RV32IM-NEXT: mul s9, s4, s1
+; RV32IM-NEXT: mul a5, s2, s1
+; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s4, s5
+; RV32IM-NEXT: sw a5, 32(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s2, s5
+; RV32IM-NEXT: sw a5, 48(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a5, s4, a0
+; RV32IM-NEXT: sw a5, 104(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a0, s2, a0
+; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a0, s2, a0
+; RV32IM-NEXT: lw a5, 620(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a5, s2, a5
+; RV32IM-NEXT: sw a5, 16(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a5, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: mul a5, s2, a5
+; RV32IM-NEXT: sw a5, 44(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul s7, s4, a1
+; RV32IM-NEXT: mul a1, s2, a1
+; RV32IM-NEXT: sw a1, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, s4, a3
+; RV32IM-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, s2, a3
+; RV32IM-NEXT: sw a1, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, s4, a4
+; RV32IM-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, s2, a4
+; RV32IM-NEXT: sw a1, 620(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi t3, t3, -1
+; RV32IM-NEXT: addi t2, t2, -1
+; RV32IM-NEXT: slli a3, s4, 10
+; RV32IM-NEXT: and a4, t3, s4
+; RV32IM-NEXT: and t2, t2, s2
+; RV32IM-NEXT: and a5, t3, s2
+; RV32IM-NEXT: slli s2, s2, 10
+; RV32IM-NEXT: andi s3, s3, 1024
+; RV32IM-NEXT: seqz t6, t6
+; RV32IM-NEXT: seqz s3, s3
+; RV32IM-NEXT: addi t6, t6, -1
+; RV32IM-NEXT: addi s3, s3, -1
+; RV32IM-NEXT: and a1, t6, a3
+; RV32IM-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a1, s3, s2
+; RV32IM-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a1, t6, s2
+; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a7
+; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: sw a1, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, a1, a0
-; RV32IM-NEXT: lw a0, 156(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 132(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 120(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 232(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 220(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 216(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 320(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor t6, s0, t6
-; RV32IM-NEXT: xor s0, s1, s4
-; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, a0, a1
-; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s4, s9, a0
-; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s10, a0
-; RV32IM-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor s5, s5, s6
-; RV32IM-NEXT: xor t1, t1, t2
-; RV32IM-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, a0, a1
-; RV32IM-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, a1, a0
-; RV32IM-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 332(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 280(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a1, 272(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
+; RV32IM-NEXT: lw a1, 260(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 200(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a4, a0
+; RV32IM-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 156(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, a0, a1
+; RV32IM-NEXT: lw a0, 136(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, a0, a1
+; RV32IM-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 68(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, a1
+; RV32IM-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor s10, a0, s10
; RV32IM-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 348(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: or a0, a3, a0
-; RV32IM-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: or a0, a7, a4
-; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a3, t0, s2
+; RV32IM-NEXT: xor s9, a0, s9
+; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, a0, s7
+; RV32IM-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s8, t2, a0
+; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 168(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, a0, a1
; RV32IM-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, s3, a0
-; RV32IM-NEXT: lw a0, 136(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, s7, a0
-; RV32IM-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, s8, a0
+; RV32IM-NEXT: lw s0, 128(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, a0, s0
; RV32IM-NEXT: lw a0, 96(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, s11, a0
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, ra, a0
-; RV32IM-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, t3, a0
-; RV32IM-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s1, 76(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, a0, s1
+; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s2, a0, a1
+; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, a0, t4
+; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, a0, a1
+; RV32IM-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t5, a1, a0
+; RV32IM-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a0, a1
+; RV32IM-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, a0, a1
+; RV32IM-NEXT: lw a0, 384(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, a0, a1
; RV32IM-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a1, a0
; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 240(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a1, a0
+; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a1, a0
+; RV32IM-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a5, a0
+; RV32IM-NEXT: lw a0, 216(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 464(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, t6, s0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor s9, s1, t4
-; RV32IM-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, s4, a0
-; RV32IM-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, a0, t5
-; RV32IM-NEXT: xor ra, s5, a2
-; RV32IM-NEXT: xor s5, t1, a5
-; RV32IM-NEXT: xor a0, t2, s6
-; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 448(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a1, a0
-; RV32IM-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, a1, a0
-; RV32IM-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 176(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 152(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, a2
+; RV32IM-NEXT: lw a2, 112(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 100(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a3
+; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, ra
+; RV32IM-NEXT: lw a4, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a5, a4
+; RV32IM-NEXT: lw a5, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw ra, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, ra, a5
+; RV32IM-NEXT: lw ra, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, ra, s5
+; RV32IM-NEXT: sw ra, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 368(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 436(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 288(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 284(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 268(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 436(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, ra, s5
+; RV32IM-NEXT: sw ra, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 388(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 416(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 328(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 372(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 404(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 316(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: sw ra, 400(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw ra, 300(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor ra, s5, ra
+; RV32IM-NEXT: lw s5, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, s5, s6
+; RV32IM-NEXT: sw s6, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s6, 172(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, s11, s6
+; RV32IM-NEXT: sw s6, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s6, 108(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 348(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s5, s6
+; RV32IM-NEXT: sw s5, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s5, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s10, s5
+; RV32IM-NEXT: sw s5, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s5, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s9, s5
+; RV32IM-NEXT: sw s5, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw s5, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, s7, s5
+; RV32IM-NEXT: sw s5, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor t6, s8, t6
+; RV32IM-NEXT: sw t6, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 184(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s0, t6
+; RV32IM-NEXT: sw t6, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 116(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s1, t6
+; RV32IM-NEXT: sw t6, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 84(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s2, t6
+; RV32IM-NEXT: sw t6, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 72(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s3, t6
+; RV32IM-NEXT: sw t6, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t6, 56(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t6, s4, t6
+; RV32IM-NEXT: sw t6, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a7, t5, a7
+; RV32IM-NEXT: sw a7, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, t0, a7
+; RV32IM-NEXT: lw a7, 432(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t1, a7
+; RV32IM-NEXT: sw a7, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 312(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t2, a7
+; RV32IM-NEXT: sw a7, 516(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 308(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t3, a7
+; RV32IM-NEXT: sw a7, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a7, 296(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t4, a7
+; RV32IM-NEXT: sw a7, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a6, a0
+; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 204(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor s8, a1, a0
-; RV32IM-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, s0, a0
-; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s1, a0
-; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, a1, a0
-; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, a1, a0
-; RV32IM-NEXT: xor s4, a3, a4
-; RV32IM-NEXT: lw a0, 196(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, a7, a0
-; RV32IM-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t0, a0
-; RV32IM-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, a1, a0
-; RV32IM-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, a1, a0
-; RV32IM-NEXT: lw a0, 420(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 416(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, a0, a1
-; RV32IM-NEXT: lw a0, 412(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a6
-; RV32IM-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 408(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: lw a2, 300(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a3, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s9, a2, a0
+; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a3, a0
+; RV32IM-NEXT: lw a1, 48(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a4, a1
+; RV32IM-NEXT: sw a1, 496(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a5, a1
+; RV32IM-NEXT: sw a1, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a3, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a2, a3, 4
+; RV32IM-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a3, a3, a1
+; RV32IM-NEXT: and a2, a2, a1
+; RV32IM-NEXT: slli a3, a3, 4
+; RV32IM-NEXT: or s10, a2, a3
+; RV32IM-NEXT: lw a2, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a3, a2, 4
+; RV32IM-NEXT: and t1, a2, a1
+; RV32IM-NEXT: and a3, a3, a1
+; RV32IM-NEXT: slli t1, t1, 4
+; RV32IM-NEXT: or a2, a3, t1
+; RV32IM-NEXT: sw a2, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 484(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s7, a2, a3
+; RV32IM-NEXT: lw a2, 292(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s5, a3, a2
+; RV32IM-NEXT: lw a2, 352(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, a3, a2
+; RV32IM-NEXT: lw a2, 336(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, a3, a2
+; RV32IM-NEXT: lw a2, 324(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 436(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: lw a3, 296(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 400(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a4, a3
-; RV32IM-NEXT: lw a4, 484(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a5, 480(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: lw a5, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a6, 508(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a6, a5
-; RV32IM-NEXT: lw a6, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a7, 500(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a7, a6
-; RV32IM-NEXT: lw a7, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw t0, 496(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, t0, a7
-; RV32IM-NEXT: lw t0, 504(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw t1, 476(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t1, t0
-; RV32IM-NEXT: lw t1, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s9, t1, s9
-; RV32IM-NEXT: lw t1, 224(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, s10, t1
-; RV32IM-NEXT: lw t1, 200(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s11, s11, t1
-; RV32IM-NEXT: lw t1, 152(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor ra, ra, t1
-; RV32IM-NEXT: lw t1, 88(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, s5, t1
-; RV32IM-NEXT: lw s5, 472(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s5, s5, s6
-; RV32IM-NEXT: sw s5, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s5, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 468(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s6, s5
-; RV32IM-NEXT: lw s5, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, s7, s5
-; RV32IM-NEXT: lw s5, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s8, s8, s5
-; RV32IM-NEXT: lw s5, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s0, s5, s0
-; RV32IM-NEXT: lw s5, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s1, s5
-; RV32IM-NEXT: lw s5, 616(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s2, s2, s5
-; RV32IM-NEXT: lw s5, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s3, s3, s5
-; RV32IM-NEXT: xor t2, s4, t2
-; RV32IM-NEXT: lw s4, 328(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s4, t3, s4
-; RV32IM-NEXT: lw t3, 324(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t4, t3
-; RV32IM-NEXT: lw t3, 320(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, t5, t3
-; RV32IM-NEXT: xor t6, t6, a0
-; RV32IM-NEXT: lw a0, 316(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, a1, a0
-; RV32IM-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a0
-; RV32IM-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a0
-; RV32IM-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a0
-; RV32IM-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a0
-; RV32IM-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, a0
-; RV32IM-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a7, a0
-; RV32IM-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, a0
-; RV32IM-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, s9, a0
-; RV32IM-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s9, s10, a0
-; RV32IM-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s10, s11, a0
-; RV32IM-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, ra, a0
-; RV32IM-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, a0
-; RV32IM-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s11, a0, 4
-; RV32IM-NEXT: lw a1, 732(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and ra, a0, a1
-; RV32IM-NEXT: and s11, s11, a1
+; RV32IM-NEXT: sw a2, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, a2, a3
+; RV32IM-NEXT: lw a2, 304(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 416(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s2, a3, a2
+; RV32IM-NEXT: lw a2, 412(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 404(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s0, a2
+; RV32IM-NEXT: lw a2, 396(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 400(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, a2
+; RV32IM-NEXT: lw a2, 364(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, ra, a2
+; RV32IM-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a2, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t1, a2, 4
+; RV32IM-NEXT: and t3, a2, a1
+; RV32IM-NEXT: and t1, t1, a1
+; RV32IM-NEXT: slli t3, t3, 4
+; RV32IM-NEXT: or t6, t1, t3
+; RV32IM-NEXT: lw a2, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t3, a2, 4
+; RV32IM-NEXT: and ra, a2, a1
+; RV32IM-NEXT: and t3, t3, a1
; RV32IM-NEXT: slli ra, ra, 4
-; RV32IM-NEXT: or s11, s11, ra
-; RV32IM-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli ra, a0, 4
-; RV32IM-NEXT: and a0, a0, a1
-; RV32IM-NEXT: and ra, ra, a1
-; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: or a0, ra, a0
-; RV32IM-NEXT: lw s5, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s5, s5, s6
-; RV32IM-NEXT: lw s6, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s6, s7, s6
-; RV32IM-NEXT: lw s7, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s7, s8, s7
-; RV32IM-NEXT: xor s0, s0, s1
-; RV32IM-NEXT: lw s1, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s2, s1
-; RV32IM-NEXT: lw s2, 640(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: or t5, t3, ra
+; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 576(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t4, a1, a2
+; RV32IM-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, a2, a1
+; RV32IM-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a2, a1
+; RV32IM-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a2, a1
+; RV32IM-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: sw a1, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, a1, a2
+; RV32IM-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a2, a1
+; RV32IM-NEXT: lw a1, 180(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a2, a1
+; RV32IM-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a1
+; RV32IM-NEXT: lw a1, 140(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: sw a1, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s11, a1, s11
+; RV32IM-NEXT: lw a1, 344(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a4, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a1
+; RV32IM-NEXT: lw a1, 408(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a3, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a1
+; RV32IM-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a2, a1
+; RV32IM-NEXT: sw a1, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a2, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a1
+; RV32IM-NEXT: lw a1, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s8, a1, s8
+; RV32IM-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s9, s9, a1
+; RV32IM-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a0, a1
+; RV32IM-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw ra, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, ra, a0
+; RV32IM-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw ra, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, ra, a0
+; RV32IM-NEXT: xor s5, s7, s5
+; RV32IM-NEXT: lw s7, 440(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s6, s6, s7
+; RV32IM-NEXT: lw s7, 520(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s4, s4, s7
; RV32IM-NEXT: xor s2, s3, s2
-; RV32IM-NEXT: lw s8, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s3, s8, 4
-; RV32IM-NEXT: and s8, s8, a1
-; RV32IM-NEXT: and s3, s3, a1
-; RV32IM-NEXT: slli s8, s8, 4
-; RV32IM-NEXT: or s3, s3, s8
-; RV32IM-NEXT: lw ra, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli s8, ra, 4
-; RV32IM-NEXT: and ra, ra, a1
-; RV32IM-NEXT: and s8, s8, a1
-; RV32IM-NEXT: slli ra, ra, 4
-; RV32IM-NEXT: or s8, s8, ra
-; RV32IM-NEXT: xor t2, t2, s4
-; RV32IM-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t4, a1
-; RV32IM-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t5, t5, a1
-; RV32IM-NEXT: xor a1, t6, t3
-; RV32IM-NEXT: lw t3, 336(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t3
-; RV32IM-NEXT: lw t3, 332(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t3
-; RV32IM-NEXT: lw t3, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, t3
-; RV32IM-NEXT: lw t3, 684(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, t3
-; RV32IM-NEXT: lw t3, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t6, s10, t3
-; RV32IM-NEXT: lw t3, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t3
-; RV32IM-NEXT: xor s4, s5, s6
-; RV32IM-NEXT: lw t3, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s5, s7, t3
-; RV32IM-NEXT: xor s0, s0, s1
-; RV32IM-NEXT: lw t3, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor s1, s2, t3
+; RV32IM-NEXT: lw s3, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s0, s0, s3
+; RV32IM-NEXT: lw s3, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, s3
+; RV32IM-NEXT: xor t1, t4, t1
+; RV32IM-NEXT: lw t4, 164(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor t2, t2, t4
-; RV32IM-NEXT: lw t3, 440(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t4, t5, t3
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 344(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: mv s2, a4
-; RV32IM-NEXT: xor a3, a4, a5
-; RV32IM-NEXT: xor a3, a3, a6
-; RV32IM-NEXT: lw a5, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, t0, a5
-; RV32IM-NEXT: xor a6, a7, s9
-; RV32IM-NEXT: xor a6, a6, t6
-; RV32IM-NEXT: lw t0, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t1, t0
-; RV32IM-NEXT: xor t1, s4, s5
+; RV32IM-NEXT: lw t4, 428(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, t3, t4
+; RV32IM-NEXT: xor t0, t0, a6
+; RV32IM-NEXT: lw a6, 220(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, a6
+; RV32IM-NEXT: lw a6, 208(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: xor a6, s11, a4
+; RV32IM-NEXT: lw a4, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: lw a4, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a4
+; RV32IM-NEXT: xor a4, s8, s9
+; RV32IM-NEXT: lw t4, 420(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, t4
+; RV32IM-NEXT: lw t4, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a0, t4
+; RV32IM-NEXT: xor t4, s5, s6
+; RV32IM-NEXT: lw s3, 528(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s3, s4, s3
+; RV32IM-NEXT: xor s0, s2, s0
+; RV32IM-NEXT: lw s2, 512(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s1, s1, s2
+; RV32IM-NEXT: xor t1, t1, t2
+; RV32IM-NEXT: lw t2, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, t3, t2
+; RV32IM-NEXT: xor t0, t0, a7
+; RV32IM-NEXT: lw a7, 424(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a5, a7
+; RV32IM-NEXT: lw a5, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a2, a5
+; RV32IM-NEXT: lw a5, 620(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a0, a5
+; RV32IM-NEXT: srli a0, s10, 2
+; RV32IM-NEXT: lw s4, 632(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a2, s10, s4
+; RV32IM-NEXT: and a0, a0, s4
+; RV32IM-NEXT: slli a2, a2, 2
+; RV32IM-NEXT: or a0, a0, a2
+; RV32IM-NEXT: lw s2, 588(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli a2, s2, 2
+; RV32IM-NEXT: and s2, s2, s4
+; RV32IM-NEXT: and a2, a2, s4
+; RV32IM-NEXT: slli s2, s2, 2
+; RV32IM-NEXT: or a2, a2, s2
+; RV32IM-NEXT: xor t4, t4, s3
; RV32IM-NEXT: xor s0, s0, s1
-; RV32IM-NEXT: xor t2, t2, t4
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: lw a3, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a5, a3
-; RV32IM-NEXT: lw a4, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a6, a4
-; RV32IM-NEXT: lw a5, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, t0, a5
-; RV32IM-NEXT: srli a6, s11, 2
-; RV32IM-NEXT: lw t3, 736(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and t0, s11, t3
-; RV32IM-NEXT: and a6, a6, t3
-; RV32IM-NEXT: slli t0, t0, 2
-; RV32IM-NEXT: or a6, a6, t0
-; RV32IM-NEXT: srli t0, a0, 2
-; RV32IM-NEXT: and a0, a0, t3
-; RV32IM-NEXT: and t0, t0, t3
-; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: or t0, t0, a0
-; RV32IM-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, t1, a0
-; RV32IM-NEXT: lw t1, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, s0, t1
-; RV32IM-NEXT: srli t4, s3, 2
-; RV32IM-NEXT: and t5, s3, t3
-; RV32IM-NEXT: and t4, t4, t3
-; RV32IM-NEXT: slli t5, t5, 2
-; RV32IM-NEXT: or t4, t4, t5
-; RV32IM-NEXT: srli t5, s8, 2
-; RV32IM-NEXT: and t6, s8, t3
-; RV32IM-NEXT: and t5, t5, t3
+; RV32IM-NEXT: srli s1, t6, 2
+; RV32IM-NEXT: and t6, t6, s4
+; RV32IM-NEXT: and s1, s1, s4
; RV32IM-NEXT: slli t6, t6, 2
-; RV32IM-NEXT: or t5, t5, t6
-; RV32IM-NEXT: lw t3, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t2, t2, t3
-; RV32IM-NEXT: lw t3, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, t3
-; RV32IM-NEXT: xor a0, t1, a0
-; RV32IM-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: xor a0, a1, t2
-; RV32IM-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: slli t3, s2, 24
-; RV32IM-NEXT: lw t1, 744(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a0, a2, t1
-; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a1, t3, a0
-; RV32IM-NEXT: xor a3, a2, a3
-; RV32IM-NEXT: srli a2, a2, 8
-; RV32IM-NEXT: and a0, a2, t1
-; RV32IM-NEXT: srli a3, a3, 24
-; RV32IM-NEXT: or a3, a0, a3
-; RV32IM-NEXT: slli a7, a7, 24
-; RV32IM-NEXT: and a0, a4, t1
-; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a2, a7, a0
-; RV32IM-NEXT: xor a5, a4, a5
-; RV32IM-NEXT: srli a4, a4, 8
-; RV32IM-NEXT: and a0, a4, t1
+; RV32IM-NEXT: or t6, s1, t6
+; RV32IM-NEXT: srli s1, t5, 2
+; RV32IM-NEXT: and t5, t5, s4
+; RV32IM-NEXT: and s1, s1, s4
+; RV32IM-NEXT: mv s3, s4
+; RV32IM-NEXT: slli t5, t5, 2
+; RV32IM-NEXT: or t5, s1, t5
+; RV32IM-NEXT: xor t1, t1, t2
+; RV32IM-NEXT: xor t0, t0, t3
+; RV32IM-NEXT: xor a3, a6, a3
+; RV32IM-NEXT: lw t2, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a3, t2
+; RV32IM-NEXT: xor a1, a4, a1
+; RV32IM-NEXT: lw a3, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a1, a3
+; RV32IM-NEXT: lw a1, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, t4, a1
+; RV32IM-NEXT: lw a3, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, s0, a3
+; RV32IM-NEXT: lw t4, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t1, t1, t4
+; RV32IM-NEXT: lw t4, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, t4
+; RV32IM-NEXT: xor a1, a3, a1
+; RV32IM-NEXT: sw a1, 620(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a1, t0, t1
+; RV32IM-NEXT: sw a1, 616(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: srli a1, a0, 1
+; RV32IM-NEXT: lw s4, 628(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a0, a0, s4
+; RV32IM-NEXT: and a1, a1, s4
+; RV32IM-NEXT: slli a0, a0, 1
+; RV32IM-NEXT: or a1, a1, a0
+; RV32IM-NEXT: srli a0, a2, 1
+; RV32IM-NEXT: and a2, a2, s4
+; RV32IM-NEXT: and a0, a0, s4
+; RV32IM-NEXT: slli a2, a2, 1
+; RV32IM-NEXT: or a0, a0, a2
+; RV32IM-NEXT: srli a2, t6, 1
+; RV32IM-NEXT: and a3, t6, s4
+; RV32IM-NEXT: and a2, a2, s4
+; RV32IM-NEXT: slli a3, a3, 1
+; RV32IM-NEXT: or a2, a2, a3
+; RV32IM-NEXT: srli a3, t5, 1
+; RV32IM-NEXT: and t0, t5, s4
+; RV32IM-NEXT: and a3, a3, s4
+; RV32IM-NEXT: slli t0, t0, 1
+; RV32IM-NEXT: or a3, a3, t0
+; RV32IM-NEXT: slli a6, a6, 24
+; RV32IM-NEXT: lw t1, 644(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and t0, t2, t1
+; RV32IM-NEXT: slli t0, t0, 8
+; RV32IM-NEXT: or a6, a6, t0
+; RV32IM-NEXT: xor a7, t2, a7
+; RV32IM-NEXT: srli t0, t2, 8
+; RV32IM-NEXT: and t0, t0, t1
+; RV32IM-NEXT: srli a7, a7, 24
+; RV32IM-NEXT: or a7, t0, a7
+; RV32IM-NEXT: slli a4, a4, 24
+; RV32IM-NEXT: and t0, t3, t1
+; RV32IM-NEXT: slli t0, t0, 8
+; RV32IM-NEXT: or a4, a4, t0
+; RV32IM-NEXT: xor a5, t3, a5
+; RV32IM-NEXT: srli t0, t3, 8
+; RV32IM-NEXT: and t0, t0, t1
; RV32IM-NEXT: srli a5, a5, 24
-; RV32IM-NEXT: or a5, a0, a5
-; RV32IM-NEXT: srli a0, a6, 1
-; RV32IM-NEXT: lw t1, 740(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a4, a6, t1
-; RV32IM-NEXT: and a0, a0, t1
-; RV32IM-NEXT: slli a4, a4, 1
-; RV32IM-NEXT: or a0, a0, a4
-; RV32IM-NEXT: srli a4, t0, 1
-; RV32IM-NEXT: and a6, t0, t1
-; RV32IM-NEXT: and a4, a4, t1
-; RV32IM-NEXT: slli a6, a6, 1
-; RV32IM-NEXT: or a4, a4, a6
-; RV32IM-NEXT: srli a6, t4, 1
-; RV32IM-NEXT: and a7, t4, t1
-; RV32IM-NEXT: and a6, a6, t1
-; RV32IM-NEXT: slli a7, a7, 1
+; RV32IM-NEXT: or a5, t0, a5
; RV32IM-NEXT: or a6, a6, a7
-; RV32IM-NEXT: srli a7, t5, 1
-; RV32IM-NEXT: and t0, t5, t1
-; RV32IM-NEXT: and a7, a7, t1
-; RV32IM-NEXT: slli t0, t0, 1
-; RV32IM-NEXT: or s11, a7, t0
-; RV32IM-NEXT: or a1, a1, a3
-; RV32IM-NEXT: sw a1, 704(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: or a2, a2, a5
-; RV32IM-NEXT: sw a2, 716(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a1, a4, 2
-; RV32IM-NEXT: sw a1, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a1, a4, 1
-; RV32IM-NEXT: sw a1, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi a3, a4, 4
-; RV32IM-NEXT: andi a5, a4, 8
-; RV32IM-NEXT: andi a7, a4, 16
-; RV32IM-NEXT: andi t0, a4, 32
-; RV32IM-NEXT: andi t1, a4, 64
-; RV32IM-NEXT: andi t2, a4, 128
-; RV32IM-NEXT: andi t3, a4, 256
-; RV32IM-NEXT: andi t4, a4, 512
-; RV32IM-NEXT: andi t5, a4, 1024
-; RV32IM-NEXT: lw a2, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and s2, a4, a2
-; RV32IM-NEXT: lui t6, 1
-; RV32IM-NEXT: and s3, a4, t6
+; RV32IM-NEXT: sw a6, 608(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: or a4, a4, a5
+; RV32IM-NEXT: sw a4, 612(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw t0, 624(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a4, a0, t0
+; RV32IM-NEXT: mul a4, a1, a4
+; RV32IM-NEXT: lui t2, 1
+; RV32IM-NEXT: and a5, a0, t2
+; RV32IM-NEXT: mul a5, a1, a5
+; RV32IM-NEXT: xor s0, a4, a5
+; RV32IM-NEXT: lui t1, 64
+; RV32IM-NEXT: and a4, a0, t1
+; RV32IM-NEXT: mul a4, a1, a4
+; RV32IM-NEXT: lui t3, 128
+; RV32IM-NEXT: and a5, a0, t3
+; RV32IM-NEXT: mul a5, a1, a5
+; RV32IM-NEXT: xor a5, a4, a5
+; RV32IM-NEXT: lui t4, 4096
+; RV32IM-NEXT: and a4, a0, t4
+; RV32IM-NEXT: mul a4, a1, a4
+; RV32IM-NEXT: lui t5, 8192
+; RV32IM-NEXT: and a7, a0, t5
+; RV32IM-NEXT: mul a7, a1, a7
+; RV32IM-NEXT: xor a4, a4, a7
+; RV32IM-NEXT: and a7, a3, t0
+; RV32IM-NEXT: and t0, a3, t2
+; RV32IM-NEXT: mul a7, a2, a7
+; RV32IM-NEXT: mul t0, a2, t0
+; RV32IM-NEXT: xor a6, a7, t0
+; RV32IM-NEXT: sw a6, 624(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, a3, t1
+; RV32IM-NEXT: and t0, a3, t3
+; RV32IM-NEXT: mul a7, a2, a7
+; RV32IM-NEXT: mul t0, a2, t0
+; RV32IM-NEXT: xor a6, a7, t0
+; RV32IM-NEXT: sw a6, 600(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and a7, a3, t4
+; RV32IM-NEXT: and t0, a3, t5
+; RV32IM-NEXT: mul a7, a2, a7
+; RV32IM-NEXT: mul t0, a2, t0
+; RV32IM-NEXT: xor a6, a7, t0
+; RV32IM-NEXT: sw a6, 588(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 2
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: slli t0, a1, 1
+; RV32IM-NEXT: and a6, a7, t0
+; RV32IM-NEXT: sw a6, 604(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 4
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: slli t0, a1, 2
+; RV32IM-NEXT: and a6, a7, t0
+; RV32IM-NEXT: sw a6, 596(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 8
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: slli t0, a1, 3
+; RV32IM-NEXT: and a6, a7, t0
+; RV32IM-NEXT: sw a6, 584(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 16
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: slli t0, a1, 4
+; RV32IM-NEXT: and a6, a7, t0
+; RV32IM-NEXT: sw a6, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 32
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: slli t0, a1, 5
+; RV32IM-NEXT: and a6, a7, t0
+; RV32IM-NEXT: sw a6, 572(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 64
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: slli t0, a1, 6
+; RV32IM-NEXT: and a6, a7, t0
+; RV32IM-NEXT: sw a6, 580(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 128
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: slli t0, a1, 7
+; RV32IM-NEXT: and a6, a7, t0
+; RV32IM-NEXT: sw a6, 552(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 256
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: slli t0, a1, 8
+; RV32IM-NEXT: and a6, a7, t0
+; RV32IM-NEXT: sw a6, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 512
+; RV32IM-NEXT: seqz a7, a7
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: slli t0, a1, 9
+; RV32IM-NEXT: and a6, a7, t0
+; RV32IM-NEXT: sw a6, 556(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a7, a0, 1
+; RV32IM-NEXT: seqz a7, a7
; RV32IM-NEXT: lui t6, 2
-; RV32IM-NEXT: and s4, a4, t6
-; RV32IM-NEXT: lui t6, 4
-; RV32IM-NEXT: and s7, a4, t6
-; RV32IM-NEXT: lui t6, 8
-; RV32IM-NEXT: and s8, a4, t6
-; RV32IM-NEXT: lui t6, 16
-; RV32IM-NEXT: and s10, a4, t6
-; RV32IM-NEXT: lui t6, 32
-; RV32IM-NEXT: and t6, a4, t6
-; RV32IM-NEXT: lui s0, 64
-; RV32IM-NEXT: and s0, a4, s0
-; RV32IM-NEXT: lui s1, 128
-; RV32IM-NEXT: and s1, a4, s1
-; RV32IM-NEXT: lui s5, 256
-; RV32IM-NEXT: and s5, a4, s5
-; RV32IM-NEXT: lui a1, 512
-; RV32IM-NEXT: and s6, a4, a1
-; RV32IM-NEXT: lui s9, 1024
-; RV32IM-NEXT: and s9, a4, s9
-; RV32IM-NEXT: lui ra, 2048
-; RV32IM-NEXT: and ra, a4, ra
-; RV32IM-NEXT: lui a1, 4096
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 8192
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 16384
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 32768
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 65536
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 131072
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 262144
-; RV32IM-NEXT: and a1, a4, a1
-; RV32IM-NEXT: sw a1, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a1, 524288
-; RV32IM-NEXT: and a4, a4, a1
-; RV32IM-NEXT: lw a1, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 628(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 624(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a3
-; RV32IM-NEXT: sw a1, 620(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a5
-; RV32IM-NEXT: sw a1, 616(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, a7
-; RV32IM-NEXT: sw a1, 612(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t0
-; RV32IM-NEXT: sw a1, 608(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, t1
-; RV32IM-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, t2
-; RV32IM-NEXT: sw a3, 696(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t3
-; RV32IM-NEXT: sw a1, 604(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, t4
-; RV32IM-NEXT: sw a1, 600(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, t5
-; RV32IM-NEXT: sw a3, 648(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s2
-; RV32IM-NEXT: sw a3, 672(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s3
-; RV32IM-NEXT: sw a3, 688(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s4
-; RV32IM-NEXT: sw a1, 596(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s7
-; RV32IM-NEXT: sw a1, 592(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s8
-; RV32IM-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s10
-; RV32IM-NEXT: sw a3, 664(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, t6
-; RV32IM-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s0
-; RV32IM-NEXT: sw a3, 712(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s1
-; RV32IM-NEXT: sw a1, 584(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a1, a0, s5
-; RV32IM-NEXT: sw a1, 580(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s6
-; RV32IM-NEXT: sw a3, 636(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, s9
-; RV32IM-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a3, a0, ra
-; RV32IM-NEXT: sw a3, 668(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 572(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a1, a0, a1
-; RV32IM-NEXT: sw a1, 568(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a1
-; RV32IM-NEXT: sw a3, 632(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a1
-; RV32IM-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a1
-; RV32IM-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a1
-; RV32IM-NEXT: sw a3, 692(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a3, a0, a1
-; RV32IM-NEXT: sw a3, 700(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a0, a4
-; RV32IM-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: and s4, s11, a2
-; RV32IM-NEXT: lui a0, 1
-; RV32IM-NEXT: and t0, s11, a0
-; RV32IM-NEXT: lui a0, 2
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 728(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 4
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 8
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 16
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 32
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 64
-; RV32IM-NEXT: and s0, s11, a0
-; RV32IM-NEXT: lui a0, 128
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 256
-; RV32IM-NEXT: and s5, s11, a0
-; RV32IM-NEXT: lui a0, 512
-; RV32IM-NEXT: and s6, s11, a0
-; RV32IM-NEXT: lui a0, 1024
-; RV32IM-NEXT: and s7, s11, a0
-; RV32IM-NEXT: lui a0, 2048
-; RV32IM-NEXT: and s8, s11, a0
-; RV32IM-NEXT: lui a0, 4096
-; RV32IM-NEXT: and s9, s11, a0
-; RV32IM-NEXT: lui a0, 8192
-; RV32IM-NEXT: and s10, s11, a0
-; RV32IM-NEXT: lui a0, 16384
-; RV32IM-NEXT: and ra, s11, a0
-; RV32IM-NEXT: lui a0, 32768
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 65536
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 131072
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 262144
-; RV32IM-NEXT: and a0, s11, a0
+; RV32IM-NEXT: and t0, a0, t6
+; RV32IM-NEXT: mul t0, a1, t0
+; RV32IM-NEXT: lui t1, 4
+; RV32IM-NEXT: and t1, a0, t1
+; RV32IM-NEXT: mul a6, a1, t1
+; RV32IM-NEXT: sw a6, 528(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t1, 8
+; RV32IM-NEXT: and t1, a0, t1
+; RV32IM-NEXT: mul a6, a1, t1
+; RV32IM-NEXT: sw a6, 548(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui a6, 16
+; RV32IM-NEXT: and t1, a0, a6
+; RV32IM-NEXT: mul a6, a1, t1
+; RV32IM-NEXT: sw a6, 560(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s11, 32
+; RV32IM-NEXT: and t1, a0, s11
+; RV32IM-NEXT: mul a6, a1, t1
+; RV32IM-NEXT: sw a6, 592(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s6, 256
+; RV32IM-NEXT: and t1, a0, s6
+; RV32IM-NEXT: mul t1, a1, t1
+; RV32IM-NEXT: lui s9, 512
+; RV32IM-NEXT: and t2, a0, s9
+; RV32IM-NEXT: mul a6, a1, t2
+; RV32IM-NEXT: sw a6, 520(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui ra, 1024
+; RV32IM-NEXT: and t2, a0, ra
+; RV32IM-NEXT: mul a6, a1, t2
+; RV32IM-NEXT: sw a6, 532(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s8, 2048
+; RV32IM-NEXT: and t2, a0, s8
+; RV32IM-NEXT: mul a6, a1, t2
+; RV32IM-NEXT: sw a6, 544(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s7, 16384
+; RV32IM-NEXT: and t2, a0, s7
+; RV32IM-NEXT: mul t2, a1, t2
+; RV32IM-NEXT: lui a6, 32768
+; RV32IM-NEXT: and t3, a0, a6
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: sw t3, 512(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s5, 65536
+; RV32IM-NEXT: and t3, a0, s5
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: sw t3, 524(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui s2, 131072
+; RV32IM-NEXT: and t3, a0, s2
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: sw t3, 536(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t4, 262144
+; RV32IM-NEXT: and t3, a0, t4
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: sw t3, 564(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lui t5, 524288
+; RV32IM-NEXT: and t3, a0, t5
+; RV32IM-NEXT: mul t3, a1, t3
+; RV32IM-NEXT: sw t3, 568(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi a7, a7, -1
+; RV32IM-NEXT: and a7, a7, a1
+; RV32IM-NEXT: sw a7, 508(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: slli a1, a1, 10
+; RV32IM-NEXT: andi a0, a0, 1024
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: and a0, a0, a1
; RV32IM-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lui a0, 524288
-; RV32IM-NEXT: and a0, s11, a0
-; RV32IM-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: andi t4, s11, 2
-; RV32IM-NEXT: andi t5, s11, 1
-; RV32IM-NEXT: andi s2, s11, 4
-; RV32IM-NEXT: andi a0, s11, 8
-; RV32IM-NEXT: andi a1, s11, 16
-; RV32IM-NEXT: andi a2, s11, 32
-; RV32IM-NEXT: andi a3, s11, 64
-; RV32IM-NEXT: andi a4, s11, 128
-; RV32IM-NEXT: andi a5, s11, 256
-; RV32IM-NEXT: andi a7, s11, 512
-; RV32IM-NEXT: andi s11, s11, 1024
-; RV32IM-NEXT: mul s3, a6, t4
-; RV32IM-NEXT: mul s1, a6, t5
-; RV32IM-NEXT: mul t6, a6, s2
-; RV32IM-NEXT: mul t4, a6, a0
-; RV32IM-NEXT: mul t5, a6, a1
-; RV32IM-NEXT: mul t3, a6, a2
-; RV32IM-NEXT: mul a0, a6, a3
-; RV32IM-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, a4
-; RV32IM-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul t2, a6, a5
-; RV32IM-NEXT: mul t1, a6, a7
-; RV32IM-NEXT: mul s11, a6, s11
-; RV32IM-NEXT: mul a0, a6, s4
-; RV32IM-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, t0
-; RV32IM-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul t0, a6, a0
+; RV32IM-NEXT: xor a0, s0, t0
+; RV32IM-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a5, t1
+; RV32IM-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: xor a0, a4, t2
+; RV32IM-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 2
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 1
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 4
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 2
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 8
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli s1, a2, 3
+; RV32IM-NEXT: and a0, a0, s1
+; RV32IM-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 16
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 4
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 32
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 5
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 64
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli s0, a2, 6
+; RV32IM-NEXT: and a0, a0, s0
+; RV32IM-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 128
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 7
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 256
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 8
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: andi a0, a3, 512
+; RV32IM-NEXT: seqz a0, a0
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: slli a1, a2, 9
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: and t6, a3, t6
+; RV32IM-NEXT: lui a1, 4
+; RV32IM-NEXT: and a1, a3, a1
+; RV32IM-NEXT: lui a4, 8
+; RV32IM-NEXT: and a4, a3, a4
+; RV32IM-NEXT: lui a5, 16
+; RV32IM-NEXT: and a5, a3, a5
+; RV32IM-NEXT: and t0, a3, s11
+; RV32IM-NEXT: and t1, a3, s6
+; RV32IM-NEXT: and t2, a3, s9
+; RV32IM-NEXT: and s6, a3, ra
+; RV32IM-NEXT: and s8, a3, s8
+; RV32IM-NEXT: and s9, a3, s7
+; RV32IM-NEXT: and a6, a3, a6
+; RV32IM-NEXT: and a7, a3, s5
+; RV32IM-NEXT: and t3, a3, s2
+; RV32IM-NEXT: and t4, a3, t4
+; RV32IM-NEXT: and t5, a3, t5
+; RV32IM-NEXT: andi s2, a3, 1
+; RV32IM-NEXT: seqz a0, s2
+; RV32IM-NEXT: mul t6, a2, t6
+; RV32IM-NEXT: mul s1, a2, a1
+; RV32IM-NEXT: mul s5, a2, a4
+; RV32IM-NEXT: mul a1, a2, a5
+; RV32IM-NEXT: sw a1, 444(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, a2, t0
+; RV32IM-NEXT: sw a1, 468(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul t1, a2, t1
+; RV32IM-NEXT: mul s0, a2, t2
+; RV32IM-NEXT: mul s6, a2, s6
+; RV32IM-NEXT: mul s11, a2, s8
+; RV32IM-NEXT: mul s8, a2, s9
+; RV32IM-NEXT: mul s9, a2, a6
+; RV32IM-NEXT: mul s2, a2, a7
+; RV32IM-NEXT: mul s7, a2, t3
+; RV32IM-NEXT: mul a1, a2, t4
+; RV32IM-NEXT: sw a1, 456(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: mul a1, a2, t5
+; RV32IM-NEXT: sw a1, 460(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: addi a0, a0, -1
+; RV32IM-NEXT: and t0, a0, a2
+; RV32IM-NEXT: slli a2, a2, 10
+; RV32IM-NEXT: andi a3, a3, 1024
+; RV32IM-NEXT: seqz a3, a3
+; RV32IM-NEXT: addi a3, a3, -1
+; RV32IM-NEXT: and t4, a3, a2
+; RV32IM-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t2, a0, t6
+; RV32IM-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t3, a0, t1
; RV32IM-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a7, a6, a0
+; RV32IM-NEXT: xor t5, a0, s8
+; RV32IM-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli t6, a0, 4
+; RV32IM-NEXT: lw a1, 648(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and s8, a0, a1
+; RV32IM-NEXT: and t6, t6, a1
+; RV32IM-NEXT: slli s8, s8, 4
+; RV32IM-NEXT: or t6, t6, s8
+; RV32IM-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: srli s8, a0, 4
+; RV32IM-NEXT: and a0, a0, a1
+; RV32IM-NEXT: and s8, s8, a1
+; RV32IM-NEXT: slli a0, a0, 4
+; RV32IM-NEXT: or t1, s8, a0
+; RV32IM-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s8, a1, a0
+; RV32IM-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a6, a0, a1
; RV32IM-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s4, a6, a0
-; RV32IM-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a0, a1
; RV32IM-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s0
-; RV32IM-NEXT: sw a0, 728(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s0, a6, a0
-; RV32IM-NEXT: mul s5, a6, s5
-; RV32IM-NEXT: mul s6, a6, s6
-; RV32IM-NEXT: mul a0, a6, s7
-; RV32IM-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul a0, a6, s8
-; RV32IM-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: mul s9, a6, s9
-; RV32IM-NEXT: mul s10, a6, s10
-; RV32IM-NEXT: mul s2, a6, ra
+; RV32IM-NEXT: lw a2, 540(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a0, a2
; RV32IM-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s7, a6, a0
-; RV32IM-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul s8, a6, a0
+; RV32IM-NEXT: lw a3, 504(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a0
; RV32IM-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
+; RV32IM-NEXT: lw a4, 500(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a0
; RV32IM-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: mul a0, a6, a0
-; RV32IM-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a1, a0
-; RV32IM-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a1, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a5, 496(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a0
+; RV32IM-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, t0, a0
+; RV32IM-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw a1, 484(sp) # 4-byte Folded Reload
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: lw a1, 612(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a2, 608(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 604(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a3, 600(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a3
-; RV32IM-NEXT: lw a3, 596(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a4, 592(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a4
-; RV32IM-NEXT: lw a4, 584(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw a5, 580(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a5
-; RV32IM-NEXT: lw a5, 572(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw ra, 568(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, ra
-; RV32IM-NEXT: xor s1, s1, s3
-; RV32IM-NEXT: xor t4, t6, t4
-; RV32IM-NEXT: xor t3, t5, t3
-; RV32IM-NEXT: xor t1, t2, t1
-; RV32IM-NEXT: xor a7, t0, a7
-; RV32IM-NEXT: xor t0, s0, s5
-; RV32IM-NEXT: xor t2, s9, s10
-; RV32IM-NEXT: lw t6, 704(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli t5, t6, 4
-; RV32IM-NEXT: lw s3, 732(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and t6, t6, s3
-; RV32IM-NEXT: and t5, t5, s3
-; RV32IM-NEXT: slli t6, t6, 4
-; RV32IM-NEXT: or t5, t5, t6
-; RV32IM-NEXT: lw s0, 716(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: srli t6, s0, 4
-; RV32IM-NEXT: and s0, s0, s3
-; RV32IM-NEXT: and t6, t6, s3
-; RV32IM-NEXT: slli s0, s0, 4
-; RV32IM-NEXT: or t6, t6, s0
-; RV32IM-NEXT: xor a0, a6, a0
-; RV32IM-NEXT: lw a6, 656(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a6
-; RV32IM-NEXT: lw a6, 648(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, a6
-; RV32IM-NEXT: lw a6, 640(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, a6
-; RV32IM-NEXT: lw a6, 636(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, a6
-; RV32IM-NEXT: lw a6, 632(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, a6
-; RV32IM-NEXT: xor a6, s1, t4
-; RV32IM-NEXT: lw t4, 536(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t3, t3, t4
-; RV32IM-NEXT: xor t1, t1, s11
-; RV32IM-NEXT: xor a7, a7, s4
-; RV32IM-NEXT: xor t0, t0, s6
-; RV32IM-NEXT: xor t2, t2, s2
+; RV32IM-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 472(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, s10
+; RV32IM-NEXT: lw s10, 452(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw ra, 448(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor s10, s10, ra
+; RV32IM-NEXT: xor t2, t2, s1
+; RV32IM-NEXT: xor t3, t3, s0
+; RV32IM-NEXT: xor t5, t5, s9
+; RV32IM-NEXT: xor a6, s8, a6
+; RV32IM-NEXT: lw s0, 580(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a7, s0
+; RV32IM-NEXT: lw s0, 556(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, s0
+; RV32IM-NEXT: lw s0, 548(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, s0
+; RV32IM-NEXT: lw s0, 532(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, s0
+; RV32IM-NEXT: lw s0, 524(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, s0
+; RV32IM-NEXT: xor a0, t0, a0
+; RV32IM-NEXT: lw t0, 480(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, a1, t0
+; RV32IM-NEXT: lw t0, 464(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor t0, s10, t0
+; RV32IM-NEXT: xor t2, t2, s5
+; RV32IM-NEXT: xor t3, t3, s6
+; RV32IM-NEXT: xor t5, t5, s2
+; RV32IM-NEXT: xor a6, a6, a7
+; RV32IM-NEXT: lw a7, 516(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a7
+; RV32IM-NEXT: lw a7, 560(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a7
+; RV32IM-NEXT: lw a7, 544(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a4, a4, a7
+; RV32IM-NEXT: lw a7, 536(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a7
; RV32IM-NEXT: xor a0, a0, a1
-; RV32IM-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a2, a1
-; RV32IM-NEXT: lw a2, 664(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a3, a2
-; RV32IM-NEXT: lw a3, 652(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a4, a3
-; RV32IM-NEXT: lw a4, 644(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a5, a4
-; RV32IM-NEXT: xor a5, a6, t3
-; RV32IM-NEXT: lw a6, 548(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, t1, a6
-; RV32IM-NEXT: lw t1, 540(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, t1
-; RV32IM-NEXT: lw t1, 532(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, t1
-; RV32IM-NEXT: xor t1, t2, s7
-; RV32IM-NEXT: lw t2, 696(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, t2
-; RV32IM-NEXT: lw t2, 688(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, t2
-; RV32IM-NEXT: lw t2, 684(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t2
-; RV32IM-NEXT: lw t2, 668(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a3, a3, t2
-; RV32IM-NEXT: lw t2, 660(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, t2
-; RV32IM-NEXT: lw t2, 564(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a5, a5, t2
-; RV32IM-NEXT: lw t2, 556(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, t2
-; RV32IM-NEXT: lw t2, 552(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, t2
-; RV32IM-NEXT: lw t2, 544(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t0, t0, t2
-; RV32IM-NEXT: xor t1, t1, s8
-; RV32IM-NEXT: srli t2, t5, 2
-; RV32IM-NEXT: lw s0, 736(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and t3, t5, s0
-; RV32IM-NEXT: and t2, t2, s0
-; RV32IM-NEXT: slli t3, t3, 2
-; RV32IM-NEXT: or t2, t2, t3
+; RV32IM-NEXT: xor a1, t0, t4
+; RV32IM-NEXT: lw a7, 444(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, t2, a7
+; RV32IM-NEXT: xor t0, t3, s11
+; RV32IM-NEXT: xor t2, t5, s7
; RV32IM-NEXT: srli t3, t6, 2
-; RV32IM-NEXT: and t4, t6, s0
-; RV32IM-NEXT: and t3, t3, s0
+; RV32IM-NEXT: and t4, t6, s3
+; RV32IM-NEXT: and t3, t3, s3
; RV32IM-NEXT: slli t4, t4, 2
; RV32IM-NEXT: or t3, t3, t4
-; RV32IM-NEXT: lw t4, 712(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a2, t4
-; RV32IM-NEXT: lw t4, 692(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a4, a4, t4
-; RV32IM-NEXT: lw t4, 728(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a7, t4
-; RV32IM-NEXT: lw t4, 560(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor t1, t1, t4
-; RV32IM-NEXT: xor a1, a0, a1
-; RV32IM-NEXT: xor a1, a1, a2
-; RV32IM-NEXT: lw a2, 700(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a2, a4, a2
-; RV32IM-NEXT: xor a4, a5, a6
-; RV32IM-NEXT: xor a4, a4, a7
-; RV32IM-NEXT: lw a6, 576(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, t1, a6
-; RV32IM-NEXT: xor a1, a1, a3
-; RV32IM-NEXT: lw a3, 708(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a7, a2, a3
-; RV32IM-NEXT: xor a4, a4, t0
-; RV32IM-NEXT: slli a0, a0, 24
-; RV32IM-NEXT: lw a2, 588(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a6, a6, a2
-; RV32IM-NEXT: srli a2, t2, 1
-; RV32IM-NEXT: lw t1, 740(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and a3, t2, t1
-; RV32IM-NEXT: and a2, a2, t1
-; RV32IM-NEXT: slli a3, a3, 1
-; RV32IM-NEXT: or a2, a2, a3
-; RV32IM-NEXT: srli a3, t3, 1
-; RV32IM-NEXT: and t0, t3, t1
-; RV32IM-NEXT: and a3, a3, t1
-; RV32IM-NEXT: mv t2, t1
+; RV32IM-NEXT: srli t4, t1, 2
+; RV32IM-NEXT: and t1, t1, s3
+; RV32IM-NEXT: and t4, t4, s3
+; RV32IM-NEXT: slli t1, t1, 2
+; RV32IM-NEXT: or t1, t4, t1
+; RV32IM-NEXT: xor a2, a6, a2
+; RV32IM-NEXT: lw a6, 592(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a6
+; RV32IM-NEXT: lw a6, 564(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a6
+; RV32IM-NEXT: xor a6, a0, a1
+; RV32IM-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a0, a7, a0
+; RV32IM-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a1, t2, a1
+; RV32IM-NEXT: lw a7, 568(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a5, a5, a7
+; RV32IM-NEXT: lw a7, 460(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a7, a1, a7
+; RV32IM-NEXT: xor a3, a2, a3
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: xor a0, a6, a0
+; RV32IM-NEXT: xor a4, a0, t0
+; RV32IM-NEXT: srli a0, t3, 1
+; RV32IM-NEXT: and a1, t3, s4
+; RV32IM-NEXT: and a0, a0, s4
+; RV32IM-NEXT: slli a1, a1, 1
+; RV32IM-NEXT: or a0, a0, a1
+; RV32IM-NEXT: srli a1, t1, 1
+; RV32IM-NEXT: and t0, t1, s4
+; RV32IM-NEXT: slli a2, a2, 24
+; RV32IM-NEXT: and a1, a1, s4
; RV32IM-NEXT: slli t0, t0, 1
-; RV32IM-NEXT: or a3, a3, t0
-; RV32IM-NEXT: lw t1, 744(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: and t0, a1, t1
+; RV32IM-NEXT: or a1, a1, t0
+; RV32IM-NEXT: lw t1, 644(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and t0, a3, t1
; RV32IM-NEXT: slli t0, t0, 8
-; RV32IM-NEXT: or a0, a0, t0
-; RV32IM-NEXT: slli a5, a5, 24
-; RV32IM-NEXT: xor a7, a1, a7
-; RV32IM-NEXT: srli a1, a1, 8
-; RV32IM-NEXT: and a1, a1, t1
-; RV32IM-NEXT: srli a7, a7, 24
-; RV32IM-NEXT: or a1, a1, a7
-; RV32IM-NEXT: and a7, a4, t1
-; RV32IM-NEXT: slli a7, a7, 8
-; RV32IM-NEXT: or a5, a5, a7
-; RV32IM-NEXT: xor a6, a4, a6
+; RV32IM-NEXT: or a2, a2, t0
+; RV32IM-NEXT: slli a6, a6, 24
+; RV32IM-NEXT: xor a5, a3, a5
+; RV32IM-NEXT: srli a3, a3, 8
+; RV32IM-NEXT: and a3, a3, t1
+; RV32IM-NEXT: srli a5, a5, 24
+; RV32IM-NEXT: or a3, a3, a5
+; RV32IM-NEXT: and a5, a4, t1
+; RV32IM-NEXT: slli a5, a5, 8
+; RV32IM-NEXT: or a5, a6, a5
+; RV32IM-NEXT: xor a6, a4, a7
; RV32IM-NEXT: srli a4, a4, 8
; RV32IM-NEXT: and a4, a4, t1
; RV32IM-NEXT: srli a6, a6, 24
; RV32IM-NEXT: or a4, a4, a6
; RV32IM-NEXT: lui a6, 349525
; RV32IM-NEXT: addi a6, a6, 1364
-; RV32IM-NEXT: or a0, a0, a1
+; RV32IM-NEXT: or a2, a2, a3
; RV32IM-NEXT: or a4, a5, a4
-; RV32IM-NEXT: srli a1, a0, 4
-; RV32IM-NEXT: and a0, a0, s3
+; RV32IM-NEXT: srli a3, a2, 4
+; RV32IM-NEXT: lw a7, 648(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: and a2, a2, a7
; RV32IM-NEXT: srli a5, a4, 4
-; RV32IM-NEXT: and a4, a4, s3
-; RV32IM-NEXT: and a1, a1, s3
-; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: and a5, a5, s3
+; RV32IM-NEXT: and a4, a4, a7
+; RV32IM-NEXT: and a3, a3, a7
+; RV32IM-NEXT: slli a2, a2, 4
+; RV32IM-NEXT: and a5, a5, a7
; RV32IM-NEXT: slli a4, a4, 4
-; RV32IM-NEXT: or a0, a1, a0
+; RV32IM-NEXT: or a2, a3, a2
; RV32IM-NEXT: or a4, a5, a4
-; RV32IM-NEXT: srli a1, a0, 2
-; RV32IM-NEXT: and a0, a0, s0
+; RV32IM-NEXT: srli a3, a2, 2
+; RV32IM-NEXT: and a2, a2, s3
; RV32IM-NEXT: srli a5, a4, 2
-; RV32IM-NEXT: and a4, a4, s0
-; RV32IM-NEXT: and a1, a1, s0
-; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: and a5, a5, s0
+; RV32IM-NEXT: and a4, a4, s3
+; RV32IM-NEXT: and a3, a3, s3
+; RV32IM-NEXT: slli a2, a2, 2
+; RV32IM-NEXT: and a5, a5, s3
; RV32IM-NEXT: slli a4, a4, 2
-; RV32IM-NEXT: or a0, a1, a0
+; RV32IM-NEXT: or a2, a3, a2
; RV32IM-NEXT: or a4, a5, a4
-; RV32IM-NEXT: srli a1, a0, 1
+; RV32IM-NEXT: srli a3, a2, 1
; RV32IM-NEXT: srli a5, a4, 1
-; RV32IM-NEXT: and a1, a1, a6
+; RV32IM-NEXT: and a3, a3, a6
; RV32IM-NEXT: and a5, a5, a6
-; RV32IM-NEXT: and a0, a0, t2
-; RV32IM-NEXT: slli a0, a0, 1
-; RV32IM-NEXT: or a0, a1, a0
-; RV32IM-NEXT: and a1, a4, t2
-; RV32IM-NEXT: slli a1, a1, 1
-; RV32IM-NEXT: or a1, a5, a1
-; RV32IM-NEXT: srli a0, a0, 1
-; RV32IM-NEXT: srli a1, a1, 1
-; RV32IM-NEXT: lw a4, 724(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a0, a0, a4
-; RV32IM-NEXT: lw a4, 720(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: xor a1, a1, a4
-; RV32IM-NEXT: srli a4, a0, 8
-; RV32IM-NEXT: srli a5, a0, 24
-; RV32IM-NEXT: slli a6, a0, 24
-; RV32IM-NEXT: and a0, a0, t1
+; RV32IM-NEXT: and a2, a2, s4
+; RV32IM-NEXT: slli a2, a2, 1
+; RV32IM-NEXT: or a2, a3, a2
+; RV32IM-NEXT: and a3, a4, s4
+; RV32IM-NEXT: slli a3, a3, 1
+; RV32IM-NEXT: or a3, a5, a3
+; RV32IM-NEXT: srli a2, a2, 1
+; RV32IM-NEXT: srli a3, a3, 1
+; RV32IM-NEXT: lw a4, 620(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a2, a2, a4
+; RV32IM-NEXT: lw a4, 616(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: xor a3, a3, a4
+; RV32IM-NEXT: srli a4, a2, 8
+; RV32IM-NEXT: srli a5, a2, 24
+; RV32IM-NEXT: slli a6, a2, 24
+; RV32IM-NEXT: and a2, a2, t1
; RV32IM-NEXT: and a4, a4, t1
; RV32IM-NEXT: or a4, a4, a5
-; RV32IM-NEXT: srli a5, a1, 8
-; RV32IM-NEXT: slli a0, a0, 8
-; RV32IM-NEXT: or a0, a6, a0
-; RV32IM-NEXT: srli a6, a1, 24
+; RV32IM-NEXT: srli a5, a3, 8
+; RV32IM-NEXT: slli a2, a2, 8
+; RV32IM-NEXT: or a2, a6, a2
+; RV32IM-NEXT: srli a6, a3, 24
; RV32IM-NEXT: and a5, a5, t1
; RV32IM-NEXT: or a5, a5, a6
-; RV32IM-NEXT: and a6, a1, t1
-; RV32IM-NEXT: slli a1, a1, 24
+; RV32IM-NEXT: and a6, a3, t1
+; RV32IM-NEXT: slli a3, a3, 24
; RV32IM-NEXT: slli a6, a6, 8
-; RV32IM-NEXT: or a1, a1, a6
-; RV32IM-NEXT: or a0, a0, a4
-; RV32IM-NEXT: or a1, a1, a5
-; RV32IM-NEXT: srli a4, a0, 4
-; RV32IM-NEXT: and a0, a0, s3
-; RV32IM-NEXT: srli a5, a1, 4
-; RV32IM-NEXT: and a1, a1, s3
+; RV32IM-NEXT: or a3, a3, a6
+; RV32IM-NEXT: or a2, a2, a4
+; RV32IM-NEXT: or a3, a3, a5
+; RV32IM-NEXT: srli a4, a2, 4
+; RV32IM-NEXT: and a2, a2, a7
+; RV32IM-NEXT: srli a5, a3, 4
+; RV32IM-NEXT: and a3, a3, a7
+; RV32IM-NEXT: and a4, a4, a7
+; RV32IM-NEXT: and a5, a5, a7
+; RV32IM-NEXT: slli a2, a2, 4
+; RV32IM-NEXT: slli a3, a3, 4
+; RV32IM-NEXT: or a2, a4, a2
+; RV32IM-NEXT: or a3, a5, a3
+; RV32IM-NEXT: srli a4, a2, 2
+; RV32IM-NEXT: and a2, a2, s3
+; RV32IM-NEXT: srli a5, a3, 2
+; RV32IM-NEXT: and a3, a3, s3
; RV32IM-NEXT: and a4, a4, s3
; RV32IM-NEXT: and a5, a5, s3
-; RV32IM-NEXT: slli a0, a0, 4
-; RV32IM-NEXT: slli a1, a1, 4
-; RV32IM-NEXT: or a0, a4, a0
-; RV32IM-NEXT: or a1, a5, a1
-; RV32IM-NEXT: srli a4, a0, 2
-; RV32IM-NEXT: and a0, a0, s0
-; RV32IM-NEXT: srli a5, a1, 2
-; RV32IM-NEXT: and a1, a1, s0
-; RV32IM-NEXT: and a4, a4, s0
-; RV32IM-NEXT: and a5, a5, s0
-; RV32IM-NEXT: slli a0, a0, 2
-; RV32IM-NEXT: slli a1, a1, 2
-; RV32IM-NEXT: or a0, a4, a0
-; RV32IM-NEXT: or a1, a5, a1
-; RV32IM-NEXT: srli a4, a0, 1
-; RV32IM-NEXT: and a0, a0, t2
-; RV32IM-NEXT: srli a5, a1, 1
-; RV32IM-NEXT: and a1, a1, t2
-; RV32IM-NEXT: and a4, a4, t2
-; RV32IM-NEXT: and a5, a5, t2
-; RV32IM-NEXT: slli a0, a0, 1
-; RV32IM-NEXT: or a0, a4, a0
-; RV32IM-NEXT: slli a1, a1, 1
-; RV32IM-NEXT: or a1, a5, a1
-; RV32IM-NEXT: lw a4, 676(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: sw a0, 0(a4)
-; RV32IM-NEXT: sw a2, 4(a4)
-; RV32IM-NEXT: sw a1, 8(a4)
-; RV32IM-NEXT: sw a3, 12(a4)
-; RV32IM-NEXT: lw a4, 680(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: sw a0, 0(a4)
-; RV32IM-NEXT: sw a2, 4(a4)
-; RV32IM-NEXT: sw a1, 8(a4)
-; RV32IM-NEXT: sw a3, 12(a4)
-; RV32IM-NEXT: lw ra, 796(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s0, 792(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s1, 788(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s2, 784(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s3, 780(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s4, 776(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s5, 772(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s6, 768(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s7, 764(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s8, 760(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s9, 756(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s10, 752(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: lw s11, 748(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: addi sp, sp, 800
+; RV32IM-NEXT: slli a2, a2, 2
+; RV32IM-NEXT: slli a3, a3, 2
+; RV32IM-NEXT: or a2, a4, a2
+; RV32IM-NEXT: or a3, a5, a3
+; RV32IM-NEXT: srli a4, a2, 1
+; RV32IM-NEXT: and a2, a2, s4
+; RV32IM-NEXT: srli a5, a3, 1
+; RV32IM-NEXT: and a3, a3, s4
+; RV32IM-NEXT: and a4, a4, s4
+; RV32IM-NEXT: and a5, a5, s4
+; RV32IM-NEXT: slli a2, a2, 1
+; RV32IM-NEXT: or a2, a4, a2
+; RV32IM-NEXT: slli a3, a3, 1
+; RV32IM-NEXT: or a3, a5, a3
+; RV32IM-NEXT: lw a4, 636(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a2, 0(a4)
+; RV32IM-NEXT: sw a0, 4(a4)
+; RV32IM-NEXT: sw a3, 8(a4)
+; RV32IM-NEXT: sw a1, 12(a4)
+; RV32IM-NEXT: lw a4, 640(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: sw a2, 0(a4)
+; RV32IM-NEXT: sw a0, 4(a4)
+; RV32IM-NEXT: sw a3, 8(a4)
+; RV32IM-NEXT: sw a1, 12(a4)
+; RV32IM-NEXT: lw ra, 700(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s0, 696(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s1, 692(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s2, 688(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s3, 684(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s4, 680(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s5, 676(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s6, 672(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s7, 668(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s8, 664(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s9, 660(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s10, 656(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: lw s11, 652(sp) # 4-byte Folded Reload
+; RV32IM-NEXT: addi sp, sp, 704
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: commutative_clmulr_v2i64:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: addi sp, sp, -1200
-; RV64IM-NEXT: sd ra, 1192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s0, 1184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s1, 1176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s2, 1168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s3, 1160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s4, 1152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s5, 1144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s6, 1136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s7, 1128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s8, 1120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s9, 1112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s10, 1104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd s11, 1096(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd a5, 984(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: sd a4, 976(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s3, a0
-; RV64IM-NEXT: srli a4, a2, 24
-; RV64IM-NEXT: lui s9, 4080
-; RV64IM-NEXT: srli a5, a2, 8
+; RV64IM-NEXT: addi sp, sp, -960
+; RV64IM-NEXT: sd ra, 952(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s0, 944(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s1, 936(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s2, 928(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s3, 920(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s4, 912(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s5, 904(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s6, 896(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s7, 888(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s8, 880(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s9, 872(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s10, 864(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd s11, 856(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a5, 824(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a4, 816(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mv t6, a0
+; RV64IM-NEXT: srli t0, a2, 24
+; RV64IM-NEXT: lui t5, 4080
+; RV64IM-NEXT: srli t1, a2, 8
; RV64IM-NEXT: li t4, 255
-; RV64IM-NEXT: srli a7, a2, 40
-; RV64IM-NEXT: lui s10, 16
-; RV64IM-NEXT: srli t0, a2, 56
-; RV64IM-NEXT: srliw t1, a2, 24
-; RV64IM-NEXT: slli t3, a2, 56
-; RV64IM-NEXT: lui t6, 61681
-; RV64IM-NEXT: lui s0, 209715
-; RV64IM-NEXT: lui s4, 349525
-; RV64IM-NEXT: srli t2, a0, 24
-; RV64IM-NEXT: srli s2, a0, 8
-; RV64IM-NEXT: srli a6, a0, 40
-; RV64IM-NEXT: srli t5, a0, 56
-; RV64IM-NEXT: srliw s1, a0, 24
-; RV64IM-NEXT: slli a0, a0, 56
-; RV64IM-NEXT: sd a0, 1088(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: srli s6, a3, 24
-; RV64IM-NEXT: srli s8, a3, 8
-; RV64IM-NEXT: srli s5, a3, 40
-; RV64IM-NEXT: srli s7, a3, 56
-; RV64IM-NEXT: and a4, a4, s9
-; RV64IM-NEXT: slli ra, t4, 24
-; RV64IM-NEXT: and a5, a5, ra
-; RV64IM-NEXT: or t4, a5, a4
-; RV64IM-NEXT: addi a0, s10, -256
-; RV64IM-NEXT: and a4, a7, a0
-; RV64IM-NEXT: or a7, a4, t0
-; RV64IM-NEXT: and a4, a2, s9
-; RV64IM-NEXT: slli t1, t1, 32
-; RV64IM-NEXT: addi t0, t6, -241
-; RV64IM-NEXT: addi t6, s0, 819
-; RV64IM-NEXT: addi s0, s4, 1365
-; RV64IM-NEXT: slli a4, a4, 24
-; RV64IM-NEXT: or a5, a4, t1
-; RV64IM-NEXT: slli a4, t0, 32
-; RV64IM-NEXT: add s11, t0, a4
-; RV64IM-NEXT: slli a4, t6, 32
-; RV64IM-NEXT: add s10, t6, a4
-; RV64IM-NEXT: slli a4, s0, 32
-; RV64IM-NEXT: add s4, s0, a4
-; RV64IM-NEXT: srliw a4, a3, 24
-; RV64IM-NEXT: and t0, t2, s9
-; RV64IM-NEXT: and t1, s2, ra
+; RV64IM-NEXT: srli t2, a2, 40
+; RV64IM-NEXT: lui t3, 16
+; RV64IM-NEXT: srli s0, a2, 56
+; RV64IM-NEXT: srliw a6, a2, 24
+; RV64IM-NEXT: slli a0, a2, 56
+; RV64IM-NEXT: sd a0, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui s1, 61681
+; RV64IM-NEXT: lui s5, 209715
+; RV64IM-NEXT: lui s2, 349525
+; RV64IM-NEXT: srli s3, t6, 24
+; RV64IM-NEXT: srli s4, t6, 8
+; RV64IM-NEXT: srli a4, t6, 40
+; RV64IM-NEXT: srli a5, t6, 56
+; RV64IM-NEXT: srliw a7, t6, 24
+; RV64IM-NEXT: slli a0, t6, 56
+; RV64IM-NEXT: sd a0, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: srli s7, a3, 24
+; RV64IM-NEXT: srli s9, a3, 8
+; RV64IM-NEXT: srli s6, a3, 40
+; RV64IM-NEXT: srli s8, a3, 56
+; RV64IM-NEXT: and t0, t0, t5
+; RV64IM-NEXT: slli t4, t4, 24
+; RV64IM-NEXT: and t1, t1, t4
; RV64IM-NEXT: or t0, t1, t0
-; RV64IM-NEXT: srli t1, a1, 24
-; RV64IM-NEXT: and a6, a6, a0
-; RV64IM-NEXT: or t2, a6, t5
-; RV64IM-NEXT: and a6, s3, s9
-; RV64IM-NEXT: slli s1, s1, 32
-; RV64IM-NEXT: slli a6, a6, 24
-; RV64IM-NEXT: or a6, a6, s1
-; RV64IM-NEXT: srli t5, a1, 8
-; RV64IM-NEXT: and t6, s6, s9
-; RV64IM-NEXT: sd ra, 968(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s0, s8, ra
-; RV64IM-NEXT: or t6, s0, t6
+; RV64IM-NEXT: addi a0, t3, -256
+; RV64IM-NEXT: and t1, t2, a0
+; RV64IM-NEXT: or t1, t1, s0
+; RV64IM-NEXT: and t2, a2, t5
+; RV64IM-NEXT: slli a6, a6, 32
+; RV64IM-NEXT: addi t3, s1, -241
+; RV64IM-NEXT: addi s0, s5, 819
+; RV64IM-NEXT: addi s1, s2, 1365
+; RV64IM-NEXT: slli t2, t2, 24
+; RV64IM-NEXT: or s5, t2, a6
+; RV64IM-NEXT: slli a6, t3, 32
+; RV64IM-NEXT: add s10, t3, a6
+; RV64IM-NEXT: slli a6, s0, 32
+; RV64IM-NEXT: add s11, s0, a6
+; RV64IM-NEXT: slli a6, s1, 32
+; RV64IM-NEXT: add ra, s1, a6
+; RV64IM-NEXT: srliw t2, a3, 24
+; RV64IM-NEXT: and a6, s3, t5
+; RV64IM-NEXT: and t3, s4, t4
+; RV64IM-NEXT: or a6, t3, a6
+; RV64IM-NEXT: srli t3, a1, 24
+; RV64IM-NEXT: and a4, a4, a0
+; RV64IM-NEXT: or a5, a4, a5
+; RV64IM-NEXT: and a4, t6, t5
+; RV64IM-NEXT: slli a7, a7, 32
+; RV64IM-NEXT: slli a4, a4, 24
+; RV64IM-NEXT: or a4, a4, a7
+; RV64IM-NEXT: srli a7, a1, 8
+; RV64IM-NEXT: and s0, s7, t5
+; RV64IM-NEXT: sd t4, 808(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s1, s9, t4
+; RV64IM-NEXT: or s1, s1, s0
; RV64IM-NEXT: srli s0, a1, 40
-; RV64IM-NEXT: and s1, s5, a0
-; RV64IM-NEXT: or s1, s1, s7
-; RV64IM-NEXT: and s2, a3, s9
-; RV64IM-NEXT: slli a4, a4, 32
+; RV64IM-NEXT: and s2, s6, a0
+; RV64IM-NEXT: or s3, s2, s8
+; RV64IM-NEXT: and s2, a3, t5
+; RV64IM-NEXT: slli t2, t2, 32
; RV64IM-NEXT: slli s2, s2, 24
-; RV64IM-NEXT: or s2, s2, a4
-; RV64IM-NEXT: srli a4, a1, 56
-; RV64IM-NEXT: and t1, t1, s9
-; RV64IM-NEXT: and t5, t5, ra
-; RV64IM-NEXT: or t1, t5, t1
-; RV64IM-NEXT: srliw t5, a1, 24
-; RV64IM-NEXT: mv s5, a0
-; RV64IM-NEXT: sd a0, 952(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: or t2, s2, t2
+; RV64IM-NEXT: srli s2, a1, 56
+; RV64IM-NEXT: and t3, t3, t5
+; RV64IM-NEXT: and a7, a7, t4
+; RV64IM-NEXT: or a7, a7, t3
+; RV64IM-NEXT: srliw t3, a1, 24
+; RV64IM-NEXT: mv t4, a0
+; RV64IM-NEXT: sd a0, 784(sp) # 8-byte Folded Spill
; RV64IM-NEXT: and s0, s0, a0
-; RV64IM-NEXT: or s0, s0, a4
-; RV64IM-NEXT: and a4, a1, s9
-; RV64IM-NEXT: slli t5, t5, 32
-; RV64IM-NEXT: slli a4, a4, 24
-; RV64IM-NEXT: or t5, a4, t5
-; RV64IM-NEXT: li a4, 1
-; RV64IM-NEXT: or a7, t4, a7
-; RV64IM-NEXT: slli t4, a3, 56
+; RV64IM-NEXT: or s4, s0, s2
+; RV64IM-NEXT: and s0, a1, t5
+; RV64IM-NEXT: slli t3, t3, 32
+; RV64IM-NEXT: slli s0, s0, 24
+; RV64IM-NEXT: or t3, s0, t3
+; RV64IM-NEXT: li s0, 1
+; RV64IM-NEXT: or s6, t0, t1
+; RV64IM-NEXT: lui s2, 1
; RV64IM-NEXT: and a2, a2, a0
; RV64IM-NEXT: slli a2, a2, 40
-; RV64IM-NEXT: or a2, t3, a2
-; RV64IM-NEXT: slli t3, a1, 56
-; RV64IM-NEXT: or t0, t0, t2
-; RV64IM-NEXT: slli a0, a4, 11
-; RV64IM-NEXT: sd a0, 992(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a0, s3, s5
+; RV64IM-NEXT: ld a0, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: or a2, a0, a2
+; RV64IM-NEXT: lui t0, 64
+; RV64IM-NEXT: or t5, a6, a5
+; RV64IM-NEXT: lui t1, 128
+; RV64IM-NEXT: and a0, t6, t4
; RV64IM-NEXT: slli a0, a0, 40
-; RV64IM-NEXT: ld t2, 1088(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: or a0, t2, a0
-; RV64IM-NEXT: slli t2, a4, 31
-; RV64IM-NEXT: sd t2, 1016(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or t2, t6, s1
-; RV64IM-NEXT: slli t6, a4, 32
-; RV64IM-NEXT: sd t6, 1048(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a3, a3, s5
+; RV64IM-NEXT: ld a5, 840(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: or a0, a5, a0
+; RV64IM-NEXT: lui s7, 4096
+; RV64IM-NEXT: or t6, s1, s3
+; RV64IM-NEXT: slli a6, a3, 56
+; RV64IM-NEXT: and a3, a3, t4
; RV64IM-NEXT: slli a3, a3, 40
-; RV64IM-NEXT: or a3, t4, a3
-; RV64IM-NEXT: slli t4, a4, 33
-; RV64IM-NEXT: sd t4, 1008(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or t1, t1, s0
-; RV64IM-NEXT: slli t4, a4, 34
-; RV64IM-NEXT: sd t4, 1032(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a1, s5
+; RV64IM-NEXT: or s1, a6, a3
+; RV64IM-NEXT: lui a6, 8192
+; RV64IM-NEXT: or a7, a7, s4
+; RV64IM-NEXT: slli a3, a1, 56
+; RV64IM-NEXT: and a1, a1, t4
; RV64IM-NEXT: slli a1, a1, 40
-; RV64IM-NEXT: or a1, t3, a1
-; RV64IM-NEXT: slli t3, a4, 35
-; RV64IM-NEXT: sd t3, 1000(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a2, a2, a5
-; RV64IM-NEXT: slli a5, a4, 36
-; RV64IM-NEXT: sd a5, 1024(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a0, a0, a6
-; RV64IM-NEXT: slli a5, a4, 37
-; RV64IM-NEXT: sd a5, 1040(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a3, a3, s2
-; RV64IM-NEXT: slli a5, a4, 38
-; RV64IM-NEXT: sd a5, 1056(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a1, a1, t5
-; RV64IM-NEXT: slli a5, a4, 39
-; RV64IM-NEXT: sd a5, 1064(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a2, a2, a7
-; RV64IM-NEXT: slli a5, a4, 40
-; RV64IM-NEXT: sd a5, 1080(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a0, a0, t0
-; RV64IM-NEXT: slli a5, a4, 41
-; RV64IM-NEXT: sd a5, 1072(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a3, a3, t2
-; RV64IM-NEXT: slli a5, a4, 42
-; RV64IM-NEXT: sd a5, 1088(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: or a1, a1, t1
-; RV64IM-NEXT: srli a5, a2, 4
-; RV64IM-NEXT: sd s11, 960(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a2, a2, s11
-; RV64IM-NEXT: and a5, a5, s11
-; RV64IM-NEXT: slli a2, a2, 4
-; RV64IM-NEXT: or a2, a5, a2
-; RV64IM-NEXT: srli a5, a0, 4
-; RV64IM-NEXT: and a0, a0, s11
-; RV64IM-NEXT: and a5, a5, s11
+; RV64IM-NEXT: or a1, a3, a1
+; RV64IM-NEXT: slli s8, s0, 11
+; RV64IM-NEXT: or t4, a2, s5
+; RV64IM-NEXT: slli a3, s0, 33
+; RV64IM-NEXT: or a0, a0, a4
+; RV64IM-NEXT: slli a4, s0, 34
+; RV64IM-NEXT: or t2, s1, t2
+; RV64IM-NEXT: slli a2, s0, 40
+; RV64IM-NEXT: or a1, a1, t3
+; RV64IM-NEXT: or t3, t4, s6
+; RV64IM-NEXT: or a0, a0, t5
+; RV64IM-NEXT: or t2, t2, t6
+; RV64IM-NEXT: or a1, a1, a7
+; RV64IM-NEXT: srli a7, t3, 4
+; RV64IM-NEXT: sd s10, 800(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, t3, s10
+; RV64IM-NEXT: srli t4, a0, 4
+; RV64IM-NEXT: and a0, a0, s10
+; RV64IM-NEXT: srli t5, t2, 4
+; RV64IM-NEXT: and t2, t2, s10
+; RV64IM-NEXT: srli t6, a1, 4
+; RV64IM-NEXT: and a1, a1, s10
+; RV64IM-NEXT: and a7, a7, s10
+; RV64IM-NEXT: slli t3, t3, 4
+; RV64IM-NEXT: and t4, t4, s10
; RV64IM-NEXT: slli a0, a0, 4
-; RV64IM-NEXT: or a0, a5, a0
-; RV64IM-NEXT: srli a5, a3, 4
-; RV64IM-NEXT: and a3, a3, s11
-; RV64IM-NEXT: and a5, a5, s11
-; RV64IM-NEXT: slli a3, a3, 4
-; RV64IM-NEXT: or a3, a5, a3
-; RV64IM-NEXT: srli a5, a1, 4
-; RV64IM-NEXT: and a1, a1, s11
-; RV64IM-NEXT: and a5, a5, s11
+; RV64IM-NEXT: and t5, t5, s10
+; RV64IM-NEXT: slli t2, t2, 4
+; RV64IM-NEXT: and t6, t6, s10
; RV64IM-NEXT: slli a1, a1, 4
-; RV64IM-NEXT: or a1, a5, a1
-; RV64IM-NEXT: srli a5, a2, 2
-; RV64IM-NEXT: sd s10, 944(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a2, a2, s10
-; RV64IM-NEXT: and a5, a5, s10
-; RV64IM-NEXT: slli a2, a2, 2
-; RV64IM-NEXT: or a2, a5, a2
-; RV64IM-NEXT: srli a5, a0, 2
-; RV64IM-NEXT: and a0, a0, s10
-; RV64IM-NEXT: and a5, a5, s10
+; RV64IM-NEXT: or a7, a7, t3
+; RV64IM-NEXT: or a0, t4, a0
+; RV64IM-NEXT: or t2, t5, t2
+; RV64IM-NEXT: or a1, t6, a1
+; RV64IM-NEXT: srli t3, a7, 2
+; RV64IM-NEXT: sd s11, 792(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, a7, s11
+; RV64IM-NEXT: srli t4, a0, 2
+; RV64IM-NEXT: and a0, a0, s11
+; RV64IM-NEXT: srli t5, t2, 2
+; RV64IM-NEXT: and t2, t2, s11
+; RV64IM-NEXT: srli t6, a1, 2
+; RV64IM-NEXT: and a1, a1, s11
+; RV64IM-NEXT: and t3, t3, s11
+; RV64IM-NEXT: slli a7, a7, 2
+; RV64IM-NEXT: and t4, t4, s11
; RV64IM-NEXT: slli a0, a0, 2
-; RV64IM-NEXT: or a0, a5, a0
-; RV64IM-NEXT: srli a5, a3, 2
-; RV64IM-NEXT: and a3, a3, s10
-; RV64IM-NEXT: and a5, a5, s10
-; RV64IM-NEXT: slli a3, a3, 2
-; RV64IM-NEXT: or a5, a5, a3
-; RV64IM-NEXT: srli a3, a1, 2
-; RV64IM-NEXT: and a1, a1, s10
-; RV64IM-NEXT: and a3, a3, s10
+; RV64IM-NEXT: and t5, t5, s11
+; RV64IM-NEXT: slli t2, t2, 2
+; RV64IM-NEXT: and t6, t6, s11
; RV64IM-NEXT: slli a1, a1, 2
-; RV64IM-NEXT: or a1, a3, a1
-; RV64IM-NEXT: srli a3, a2, 1
-; RV64IM-NEXT: sd s4, 936(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a2, a2, s4
-; RV64IM-NEXT: and a3, a3, s4
-; RV64IM-NEXT: slli a2, a2, 1
-; RV64IM-NEXT: or a3, a3, a2
-; RV64IM-NEXT: srli a2, a0, 1
-; RV64IM-NEXT: and a0, a0, s4
-; RV64IM-NEXT: and a2, a2, s4
+; RV64IM-NEXT: or a7, t3, a7
+; RV64IM-NEXT: or a0, t4, a0
+; RV64IM-NEXT: or t2, t5, t2
+; RV64IM-NEXT: or a1, t6, a1
+; RV64IM-NEXT: srli t3, a7, 1
+; RV64IM-NEXT: sd ra, 776(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, a7, ra
+; RV64IM-NEXT: srli t4, a0, 1
+; RV64IM-NEXT: and a0, a0, ra
+; RV64IM-NEXT: srli t5, t2, 1
+; RV64IM-NEXT: and t2, t2, ra
+; RV64IM-NEXT: srli t6, a1, 1
+; RV64IM-NEXT: and a1, a1, ra
+; RV64IM-NEXT: and t3, t3, ra
+; RV64IM-NEXT: slli a7, a7, 1
+; RV64IM-NEXT: and t4, t4, ra
; RV64IM-NEXT: slli a0, a0, 1
-; RV64IM-NEXT: or a6, a2, a0
-; RV64IM-NEXT: srli a0, a5, 1
-; RV64IM-NEXT: and a2, a5, s4
-; RV64IM-NEXT: and a0, a0, s4
-; RV64IM-NEXT: slli a2, a2, 1
-; RV64IM-NEXT: or a0, a0, a2
-; RV64IM-NEXT: srli a2, a1, 1
-; RV64IM-NEXT: and a1, a1, s4
-; RV64IM-NEXT: and a2, a2, s4
+; RV64IM-NEXT: and s1, t5, ra
+; RV64IM-NEXT: slli t2, t2, 1
+; RV64IM-NEXT: and s3, t6, ra
; RV64IM-NEXT: slli a1, a1, 1
-; RV64IM-NEXT: or s11, a2, a1
-; RV64IM-NEXT: slli s9, a4, 43
-; RV64IM-NEXT: sd s9, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s8, a4, 44
-; RV64IM-NEXT: sd s8, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s7, a4, 45
-; RV64IM-NEXT: sd s7, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s6, a4, 46
-; RV64IM-NEXT: sd s6, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s5, a4, 47
-; RV64IM-NEXT: sd s5, 448(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: slli s4, a4, 48
-; RV64IM-NEXT: slli s3, a4, 49
-; RV64IM-NEXT: slli s2, a4, 50
-; RV64IM-NEXT: slli s1, a4, 51
-; RV64IM-NEXT: slli s0, a4, 52
-; RV64IM-NEXT: slli t6, a4, 53
-; RV64IM-NEXT: slli t5, a4, 54
-; RV64IM-NEXT: slli t4, a4, 55
-; RV64IM-NEXT: slli t3, a4, 56
-; RV64IM-NEXT: slli t2, a4, 57
-; RV64IM-NEXT: slli t1, a4, 58
-; RV64IM-NEXT: slli t0, a4, 59
-; RV64IM-NEXT: slli a7, a4, 60
-; RV64IM-NEXT: slli a5, a4, 61
-; RV64IM-NEXT: slli a4, a4, 62
-; RV64IM-NEXT: li a1, -1
-; RV64IM-NEXT: slli a2, a1, 63
-; RV64IM-NEXT: lui ra, 4
-; RV64IM-NEXT: lui s10, 8
-; RV64IM-NEXT: andi a1, a6, 2
-; RV64IM-NEXT: sd a1, 928(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 1
-; RV64IM-NEXT: sd a1, 920(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 4
-; RV64IM-NEXT: sd a1, 912(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 8
-; RV64IM-NEXT: sd a1, 904(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 16
-; RV64IM-NEXT: sd a1, 896(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 32
-; RV64IM-NEXT: sd a1, 888(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 64
-; RV64IM-NEXT: sd a1, 880(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 128
-; RV64IM-NEXT: sd a1, 872(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 256
-; RV64IM-NEXT: sd a1, 864(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 512
-; RV64IM-NEXT: sd a1, 856(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, a6, 1024
-; RV64IM-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 992(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 840(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 1
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 832(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 2
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 824(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, ra
-; RV64IM-NEXT: sd a1, 816(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s10
-; RV64IM-NEXT: sd a1, 808(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 16
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 800(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 32
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 792(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 64
-; RV64IM-NEXT: and s10, a6, a1
-; RV64IM-NEXT: lui a1, 128
-; RV64IM-NEXT: and ra, a6, a1
-; RV64IM-NEXT: lui a1, 256
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 784(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 512
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 768(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 1024
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 760(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 2048
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 736(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 4096
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 728(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 8192
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 720(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 16384
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 704(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 32768
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 688(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 65536
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 672(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 131072
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 656(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 262144
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 600(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1016(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 592(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 584(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1008(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 568(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 528(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1000(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 504(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 496(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 488(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 480(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 472(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1080(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 464(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1072(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 456(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1088(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a6, a1
-; RV64IM-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s9
-; RV64IM-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s8
-; RV64IM-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s7
-; RV64IM-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s6
-; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s5
-; RV64IM-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, s4
+; RV64IM-NEXT: or t5, t3, a7
+; RV64IM-NEXT: or t6, t4, a0
+; RV64IM-NEXT: or a0, s1, t2
+; RV64IM-NEXT: or a5, s3, a1
+; RV64IM-NEXT: and a7, t6, s8
+; RV64IM-NEXT: and t2, t6, s2
+; RV64IM-NEXT: and t3, t6, t0
+; RV64IM-NEXT: and t4, t6, t1
+; RV64IM-NEXT: and s1, t6, s7
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: mul t2, t5, t2
+; RV64IM-NEXT: xor a1, a7, t2
; RV64IM-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s9, s4
-; RV64IM-NEXT: and a1, a6, s3
+; RV64IM-NEXT: and a7, t6, a6
+; RV64IM-NEXT: mul t2, t5, t3
+; RV64IM-NEXT: mul t3, t5, t4
+; RV64IM-NEXT: xor a1, t2, t3
; RV64IM-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s8, s3
-; RV64IM-NEXT: and a1, a6, s2
+; RV64IM-NEXT: and t2, t6, a3
+; RV64IM-NEXT: mul t3, t5, s1
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: xor a1, t3, a7
+; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, t6, a4
+; RV64IM-NEXT: mul t2, t5, t2
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: xor a1, t2, a7
; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s7, s2
-; RV64IM-NEXT: and a1, a6, s1
+; RV64IM-NEXT: and a7, t6, a2
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: slli s3, s0, 41
+; RV64IM-NEXT: and t2, t6, s3
+; RV64IM-NEXT: mul t2, t5, t2
+; RV64IM-NEXT: xor a1, a7, t2
; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s6, s1
-; RV64IM-NEXT: and a1, a6, s0
-; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s5, s0
-; RV64IM-NEXT: and a1, a6, t6
-; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s4, t6
-; RV64IM-NEXT: and a1, a6, t5
-; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s3, t5
-; RV64IM-NEXT: and a1, a6, t4
-; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s2, t4
-; RV64IM-NEXT: and a1, a6, t3
-; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv s1, t3
-; RV64IM-NEXT: and a1, a6, t2
-; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t3, t2
-; RV64IM-NEXT: and a1, a6, t1
-; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t2, t1
-; RV64IM-NEXT: and a1, a6, t0
-; RV64IM-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t1, t0
-; RV64IM-NEXT: and a1, a6, a7
-; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mv t0, a7
-; RV64IM-NEXT: and a1, a6, a5
-; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, a6, a4
-; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a6, a6, a2
-; RV64IM-NEXT: ld a1, 928(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 640(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 920(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 632(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 912(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 624(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 904(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 616(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 896(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 608(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 888(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t4, a3, a1
-; RV64IM-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 712(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 872(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 880(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 864(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t5, a3, a1
-; RV64IM-NEXT: ld a1, 856(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul t6, a3, a1
-; RV64IM-NEXT: ld a1, 848(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 696(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 776(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 856(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s0, a3, a1
-; RV64IM-NEXT: ld a1, 816(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 576(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 680(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 752(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a7, a3, a1
-; RV64IM-NEXT: sd a7, 832(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a7, a3, s10
-; RV64IM-NEXT: sd a7, 904(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a3, ra
-; RV64IM-NEXT: sd a1, 560(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 784(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 552(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 664(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
+; RV64IM-NEXT: slli s1, s0, 48
+; RV64IM-NEXT: and a7, t6, s1
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: slli s4, s0, 49
+; RV64IM-NEXT: and t2, t6, s4
+; RV64IM-NEXT: mul t2, t5, t2
+; RV64IM-NEXT: xor a1, a7, t2
; RV64IM-NEXT: sd a1, 744(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 736(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 808(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 728(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 544(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 720(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 536(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 704(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 648(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 688(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 720(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 672(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 800(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 656(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 864(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 600(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 912(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 592(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 928(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 584(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 520(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 568(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 512(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 528(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 592(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 504(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 688(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 496(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
+; RV64IM-NEXT: slli a7, s0, 56
+; RV64IM-NEXT: and t2, t6, a7
+; RV64IM-NEXT: mul t2, t5, t2
+; RV64IM-NEXT: slli t4, s0, 57
+; RV64IM-NEXT: and t3, t6, t4
+; RV64IM-NEXT: mul t3, t5, t3
+; RV64IM-NEXT: xor a1, t2, t3
+; RV64IM-NEXT: sd a1, 728(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t2, a5, s8
+; RV64IM-NEXT: and t3, a5, s2
+; RV64IM-NEXT: mul t2, a0, t2
+; RV64IM-NEXT: mul t3, a0, t3
+; RV64IM-NEXT: xor a1, t2, t3
; RV64IM-NEXT: sd a1, 768(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 488(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 824(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 480(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 888(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 920(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 464(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 496(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 488(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 568(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 656(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 736(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 792(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 896(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 480(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 472(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 528(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 600(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 704(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
+; RV64IM-NEXT: and t0, a5, t0
+; RV64IM-NEXT: and t1, a5, t1
+; RV64IM-NEXT: mul t0, a0, t0
+; RV64IM-NEXT: mul t1, a0, t1
+; RV64IM-NEXT: xor a1, t0, t1
; RV64IM-NEXT: sd a1, 760(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a1, a5, s7
+; RV64IM-NEXT: and a6, a5, a6
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: xor a1, a1, a6
+; RV64IM-NEXT: sd a1, 752(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a3, a5, a3
+; RV64IM-NEXT: and a4, a5, a4
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: mul a4, a0, a4
+; RV64IM-NEXT: xor a3, a3, a4
+; RV64IM-NEXT: sd a3, 736(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a5, a2
+; RV64IM-NEXT: and a3, a5, s3
+; RV64IM-NEXT: mul a2, a0, a2
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: sd a2, 720(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s1, a5, s1
+; RV64IM-NEXT: and a2, a5, s4
+; RV64IM-NEXT: mul a3, a0, s1
+; RV64IM-NEXT: mul a2, a0, a2
+; RV64IM-NEXT: xor a2, a3, a2
+; RV64IM-NEXT: sd a2, 712(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a2, a5, a7
+; RV64IM-NEXT: and a3, a5, t4
+; RV64IM-NEXT: mul a2, a0, a2
+; RV64IM-NEXT: mul a3, a0, a3
+; RV64IM-NEXT: xor a2, a2, a3
+; RV64IM-NEXT: sd a2, 704(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 2
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 1
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 696(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 4
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 2
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 680(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 8
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 3
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 672(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 16
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 4
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 664(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 32
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 5
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 648(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 64
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 6
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 688(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 128
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 7
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 632(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 256
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 8
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 616(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 512
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, t5, 9
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 656(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t3, s0, 31
+; RV64IM-NEXT: sd t3, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s10, s0, 32
+; RV64IM-NEXT: sd s10, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a7, s0, 35
+; RV64IM-NEXT: sd a7, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s3, s0, 36
+; RV64IM-NEXT: slli s4, s0, 37
+; RV64IM-NEXT: slli s5, s0, 38
+; RV64IM-NEXT: slli s6, s0, 39
+; RV64IM-NEXT: slli s7, s0, 42
+; RV64IM-NEXT: slli s8, s0, 43
+; RV64IM-NEXT: slli a1, s0, 44
+; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a3, s0, 45
+; RV64IM-NEXT: slli a4, s0, 46
+; RV64IM-NEXT: sd a4, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t0, s0, 47
+; RV64IM-NEXT: sd t0, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t1, s0, 50
+; RV64IM-NEXT: sd t1, 192(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t4, s0, 51
+; RV64IM-NEXT: sd t4, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a2, s0, 52
+; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a2, s0, 53
+; RV64IM-NEXT: sd a2, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s1, s0, 54
+; RV64IM-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s2, s0, 55
+; RV64IM-NEXT: sd s2, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli a2, s0, 58
+; RV64IM-NEXT: sd a2, 832(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s9, s0, 59
+; RV64IM-NEXT: sd s9, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli s11, s0, 60
+; RV64IM-NEXT: sd s11, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli ra, s0, 61
+; RV64IM-NEXT: slli s0, s0, 62
+; RV64IM-NEXT: sd s0, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: li a2, -1
+; RV64IM-NEXT: slli a2, a2, 63
+; RV64IM-NEXT: sd a2, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, t6, 1
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: sd a2, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 2
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 4
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 528(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 8
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 576(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 16
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 608(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 32
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 640(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 256
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 512
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 480(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 1024
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 544(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 2048
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: sd a2, 584(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a2, 16384
+; RV64IM-NEXT: and a2, t6, a2
+; RV64IM-NEXT: mul a2, t5, a2
+; RV64IM-NEXT: lui a6, 32768
+; RV64IM-NEXT: and t2, t6, a6
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 448(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a6, 65536
+; RV64IM-NEXT: and t2, t6, a6
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 504(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a6, 131072
+; RV64IM-NEXT: and t2, t6, a6
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 568(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a6, 262144
+; RV64IM-NEXT: and t2, t6, a6
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 592(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t2, t6, t3
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 600(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t2, t6, s10
+; RV64IM-NEXT: mul a6, t5, t2
+; RV64IM-NEXT: sd a6, 624(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t2, t6, a7
+; RV64IM-NEXT: mul t2, t5, t2
+; RV64IM-NEXT: and t3, t6, s3
+; RV64IM-NEXT: mul a6, t5, t3
+; RV64IM-NEXT: sd a6, 416(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, t6, s4
+; RV64IM-NEXT: mul a6, t5, t3
+; RV64IM-NEXT: sd a6, 464(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, t6, s5
+; RV64IM-NEXT: mul a6, t5, t3
+; RV64IM-NEXT: sd a6, 512(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, t6, s6
+; RV64IM-NEXT: mul a6, t5, t3
+; RV64IM-NEXT: sd a6, 560(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and t3, t6, s7
+; RV64IM-NEXT: mul t3, t5, t3
+; RV64IM-NEXT: and s0, t6, s8
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 400(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, a1
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 432(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, a3
+; RV64IM-NEXT: mv s10, a3
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 472(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, a4
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 536(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, t0
+; RV64IM-NEXT: mul a6, t5, s0
+; RV64IM-NEXT: sd a6, 552(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s0, t6, t1
+; RV64IM-NEXT: mul s0, t5, s0
+; RV64IM-NEXT: and a6, t6, t4
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 368(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld t1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, t6, t1
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 408(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld t0, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, t6, t0
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 440(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a6, t6, s1
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 496(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a6, t6, s2
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: sd a6, 520(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 832(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, t6, a1
+; RV64IM-NEXT: mul a6, t5, a6
+; RV64IM-NEXT: and a7, t6, s9
+; RV64IM-NEXT: mul a1, t5, a7
+; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, t6, s11
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: sd a7, 392(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and a7, t6, ra
+; RV64IM-NEXT: mv s9, ra
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: sd a7, 424(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a7, t6, a1
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: sd a7, 456(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a7, t6, a1
+; RV64IM-NEXT: mul a7, t5, a7
+; RV64IM-NEXT: sd a7, 488(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 384(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: and a1, a4, t5
+; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: slli t5, t5, 10
+; RV64IM-NEXT: andi a4, t6, 1024
+; RV64IM-NEXT: seqz a4, a4
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: and a4, a4, t5
+; RV64IM-NEXT: sd a4, 384(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a4, 360(sp) # 8-byte Folded Reload
; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 816(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 872(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 464(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 456(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 504(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 584(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 672(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 728(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a3, a1
-; RV64IM-NEXT: sd a1, 784(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a3, a6
-; RV64IM-NEXT: sd a1, 840(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 992(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and s10, s11, a1
-; RV64IM-NEXT: lui a1, 1
-; RV64IM-NEXT: and ra, s11, a1
-; RV64IM-NEXT: lui a1, 2
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 992(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 4
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 8
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 16
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 32
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 64
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 128
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 256
-; RV64IM-NEXT: and a1, s11, a1
+; RV64IM-NEXT: xor a1, a4, a1
; RV64IM-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 512
-; RV64IM-NEXT: and a1, s11, a1
+; RV64IM-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a3, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, a3
; RV64IM-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 1024
-; RV64IM-NEXT: and a1, s11, a1
+; RV64IM-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, a2
+; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, t2
; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 2048
-; RV64IM-NEXT: and a1, s11, a1
+; RV64IM-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, t3
; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 4096
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 8192
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 16384
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 32768
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 65536
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 131072
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: lui a1, 262144
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1016(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1016(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1048(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1008(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1008(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1032(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1000(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1000(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1024(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1040(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1056(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 1064(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1080(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1072(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1088(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, s11, a1
-; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, s9
-; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s8, s11, s8
-; RV64IM-NEXT: and a1, s11, s7
-; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, s6
-; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, s5
-; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, s4
-; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s3, s11, s3
-; RV64IM-NEXT: and a1, s11, s2
-; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and s1, s11, s1
-; RV64IM-NEXT: and t3, s11, t3
-; RV64IM-NEXT: and a1, s11, t2
-; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, t1
-; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, t0
+; RV64IM-NEXT: ld a1, 744(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, s0
+; RV64IM-NEXT: sd a1, 744(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 728(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, a6
+; RV64IM-NEXT: sd a1, 728(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 2
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 1
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 4
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 2
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 296(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 8
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 3
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 16
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 4
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 32
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 5
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 64
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 6
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 128
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 7
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 256
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 8
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: andi a2, a5, 512
+; RV64IM-NEXT: seqz a2, a2
+; RV64IM-NEXT: addi a2, a2, -1
+; RV64IM-NEXT: slli a3, a0, 9
+; RV64IM-NEXT: and a2, a2, a3
+; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a1, 2
+; RV64IM-NEXT: and a7, a5, a1
+; RV64IM-NEXT: lui a1, 4
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, a5
+; RV64IM-NEXT: lui a1, 8
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a1, 16
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, a4
+; RV64IM-NEXT: lui a1, 32
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: and a1, s11, a2
-; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: andi a1, s11, 2
-; RV64IM-NEXT: andi a2, s11, 1
-; RV64IM-NEXT: andi a3, s11, 4
-; RV64IM-NEXT: andi a4, s11, 8
-; RV64IM-NEXT: andi a5, s11, 16
-; RV64IM-NEXT: andi a6, s11, 32
-; RV64IM-NEXT: andi a7, s11, 64
-; RV64IM-NEXT: andi t0, s11, 128
-; RV64IM-NEXT: andi t1, s11, 256
-; RV64IM-NEXT: andi t2, s11, 512
-; RV64IM-NEXT: andi s2, s11, 1024
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: lui a3, 256
+; RV64IM-NEXT: and a3, a5, a3
+; RV64IM-NEXT: lui s1, 512
+; RV64IM-NEXT: and s1, a5, s1
+; RV64IM-NEXT: lui a1, 1024
+; RV64IM-NEXT: and t6, a5, a1
+; RV64IM-NEXT: lui a2, 2048
+; RV64IM-NEXT: and a2, a5, a2
+; RV64IM-NEXT: lui a1, 16384
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a2, a0, a2
-; RV64IM-NEXT: mul a3, a0, a3
-; RV64IM-NEXT: mul a4, a0, a4
-; RV64IM-NEXT: mul a1, a0, a5
-; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, a6
+; RV64IM-NEXT: lui a1, 32768
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: lui a1, 65536
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, a7
-; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, t0
-; RV64IM-NEXT: sd a1, 448(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, t1
-; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, t2
+; RV64IM-NEXT: lui a1, 131072
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, s2
-; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, s10
-; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, ra
-; RV64IM-NEXT: sd a1, 440(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 992(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: lui a1, 262144
+; RV64IM-NEXT: and t4, a5, a1
+; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and ra, a5, a1
+; RV64IM-NEXT: ld s0, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s0, a5, s0
+; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s2, a5, a1
+; RV64IM-NEXT: and s3, a5, s3
+; RV64IM-NEXT: and s4, a5, s4
+; RV64IM-NEXT: and s5, a5, s5
+; RV64IM-NEXT: and s6, a5, s6
+; RV64IM-NEXT: and s7, a5, s7
+; RV64IM-NEXT: and s8, a5, s8
+; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: and s10, a5, s10
+; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and s11, a5, a1
+; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 992(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: and a1, a5, t1
; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: and a1, a5, t0
; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
+; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 832(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a1, a5, a1
; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1072(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1016(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1088(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s10, a0, a1
-; RV64IM-NEXT: ld a1, 1008(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s9, a0, a1
-; RV64IM-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1000(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1024(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1056(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t3, a5, a1
+; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t0, a5, a1
+; RV64IM-NEXT: and t5, a5, s9
+; RV64IM-NEXT: ld a1, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and t2, a5, a1
+; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a6, a5, a1
+; RV64IM-NEXT: andi a1, a5, 1
+; RV64IM-NEXT: seqz a4, a1
+; RV64IM-NEXT: mul t1, a0, a7
+; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1080(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s7, a0, a1
-; RV64IM-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s6, a0, a1
-; RV64IM-NEXT: ld a1, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 424(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 840(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a7, a0, a3
+; RV64IM-NEXT: mul a1, a0, s1
+; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, t6
+; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, a2
+; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a3, a0, a1
+; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1008(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1040(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1064(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, t4
+; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, ra
+; RV64IM-NEXT: sd a1, 832(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s0
+; RV64IM-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul t4, a0, s2
+; RV64IM-NEXT: mul ra, a0, s3
+; RV64IM-NEXT: mul a1, a0, s4
+; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s5
+; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s6
+; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul s7, a0, s7
+; RV64IM-NEXT: mul s9, a0, s8
; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul s5, a0, a1
-; RV64IM-NEXT: mul s4, a0, s8
-; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s10
+; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a1, a0, s11
+; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 432(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul a1, a0, s3
-; RV64IM-NEXT: sd a1, 1016(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1048(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: mul s3, a0, s1
-; RV64IM-NEXT: mul s2, a0, t3
-; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a2, a0, a1
+; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul s8, a0, a1
-; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
; RV64IM-NEXT: mul a1, a0, a1
-; RV64IM-NEXT: sd a1, 1000(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: mul a0, a0, a1
-; RV64IM-NEXT: sd a0, 1032(sp) # 8-byte Folded Spill
-; RV64IM-NEXT: ld a0, 640(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 632(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s1, a0
-; RV64IM-NEXT: ld a0, 624(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 616(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: mul a1, a0, a1
+; RV64IM-NEXT: mul s6, a0, t3
+; RV64IM-NEXT: mul s10, a0, t0
+; RV64IM-NEXT: mul t0, a0, t5
+; RV64IM-NEXT: sd t0, 104(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul t0, a0, t2
+; RV64IM-NEXT: sd t0, 152(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: mul a6, a0, a6
+; RV64IM-NEXT: sd a6, 176(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: addi a4, a4, -1
+; RV64IM-NEXT: and s2, a4, a0
+; RV64IM-NEXT: slli a0, a0, 10
+; RV64IM-NEXT: andi a5, a5, 1024
+; RV64IM-NEXT: seqz a5, a5
+; RV64IM-NEXT: addi a5, a5, -1
+; RV64IM-NEXT: and a0, a5, a0
+; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IM-NEXT: ld a0, 768(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s4, a0, t1
+; RV64IM-NEXT: ld a0, 760(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, a0, a7
+; RV64IM-NEXT: ld s0, 752(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s0, a3
+; RV64IM-NEXT: ld a0, 736(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, a0, t4
+; RV64IM-NEXT: ld a0, 720(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s5, a0, s7
+; RV64IM-NEXT: ld a0, 712(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, a0, a2
+; RV64IM-NEXT: ld a0, 704(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s3, a0, a1
+; RV64IM-NEXT: ld a0, 696(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s7, a1, a0
+; RV64IM-NEXT: ld a0, 680(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 672(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t2, a0, a1
+; RV64IM-NEXT: ld a0, 664(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 648(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t3, a0, a1
-; RV64IM-NEXT: ld a0, 608(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, a0, t4
-; RV64IM-NEXT: xor t5, t5, t6
-; RV64IM-NEXT: ld a0, 576(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, s0, a0
-; RV64IM-NEXT: ld a0, 560(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s0, 552(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, a0, s0
-; RV64IM-NEXT: ld a0, 544(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a5, 536(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a0, a5
-; RV64IM-NEXT: ld a0, 520(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 512(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a0, a1
-; RV64IM-NEXT: ld a0, 496(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 488(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a0, a1
+; RV64IM-NEXT: ld a0, 632(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 616(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, a0, a1
+; RV64IM-NEXT: ld a0, 528(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a3, 360(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, a0
; RV64IM-NEXT: ld a0, 480(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, a0, a1
-; RV64IM-NEXT: ld a0, 464(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, a0, a1
-; RV64IM-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, a2, a0
-; RV64IM-NEXT: xor a0, a3, a4
-; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a4, 352(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, a0
+; RV64IM-NEXT: ld a0, 448(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a5, 376(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, a0
+; RV64IM-NEXT: ld a0, 416(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a1, a0
+; RV64IM-NEXT: ld a0, 400(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a1, a0
+; RV64IM-NEXT: ld a0, 368(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 744(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, a1, a0
+; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 728(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, a1, a0
+; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s2, a0
+; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a1, a1, a2
-; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a3, 56(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, a3
-; RV64IM-NEXT: ld a3, 48(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld a4, 40(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: ld a4, 32(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, ra
-; RV64IM-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor ra, ra, s11
-; RV64IM-NEXT: xor s9, s10, s9
-; RV64IM-NEXT: xor s6, s7, s6
+; RV64IM-NEXT: ld a2, 256(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 248(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, s11
+; RV64IM-NEXT: ld s11, 80(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s4, s4, s11
+; RV64IM-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, t6, s11
+; RV64IM-NEXT: ld s11, 48(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s0, s11
+; RV64IM-NEXT: xor s1, s1, ra
+; RV64IM-NEXT: xor s5, s5, s9
+; RV64IM-NEXT: xor t5, t5, s8
+; RV64IM-NEXT: xor s3, s3, s6
+; RV64IM-NEXT: xor t2, s7, t2
+; RV64IM-NEXT: ld s6, 688(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, t3, s6
+; RV64IM-NEXT: ld s6, 656(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t4, s6
+; RV64IM-NEXT: ld s6, 576(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, s6
+; RV64IM-NEXT: ld s6, 544(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, s6
+; RV64IM-NEXT: ld s6, 504(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, s6
+; RV64IM-NEXT: ld s6, 464(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, s6
+; RV64IM-NEXT: ld s6, 432(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, s6
+; RV64IM-NEXT: ld s6, 408(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, s6
+; RV64IM-NEXT: ld s6, 392(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, s6
+; RV64IM-NEXT: xor a0, s2, a0
+; RV64IM-NEXT: ld s2, 304(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a1, s2
+; RV64IM-NEXT: ld s2, 272(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, s2
+; RV64IM-NEXT: ld s2, 144(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s4, s2
+; RV64IM-NEXT: ld s4, 128(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, t6, s4
+; RV64IM-NEXT: ld s4, 112(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s0, s4
+; RV64IM-NEXT: ld s4, 96(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, s1, s4
+; RV64IM-NEXT: ld s4, 88(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor s4, s5, s4
-; RV64IM-NEXT: xor s2, s3, s2
-; RV64IM-NEXT: xor t3, s1, t3
-; RV64IM-NEXT: ld s1, 712(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t4, s1
-; RV64IM-NEXT: ld s1, 696(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t5, s1
-; RV64IM-NEXT: ld s1, 680(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, t6, s1
-; RV64IM-NEXT: ld s1, 664(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s0, s1
-; RV64IM-NEXT: ld s1, 648(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, s1
-; RV64IM-NEXT: ld s1, 592(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, s1
-; RV64IM-NEXT: ld s1, 568(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, s1
-; RV64IM-NEXT: ld s1, 528(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, s1
-; RV64IM-NEXT: ld s1, 504(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, s1
-; RV64IM-NEXT: xor a0, t2, a0
-; RV64IM-NEXT: ld t2, 168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, a1, t2
-; RV64IM-NEXT: ld t2, 160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t2
-; RV64IM-NEXT: ld t2, 144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t2
-; RV64IM-NEXT: ld t2, 120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, t2
-; RV64IM-NEXT: ld t2, 320(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, ra, t2
-; RV64IM-NEXT: ld s1, 304(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s9, s1
-; RV64IM-NEXT: ld s3, 288(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s3, s6, s3
-; RV64IM-NEXT: ld s5, 280(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s4, s4, s5
-; RV64IM-NEXT: xor s2, s2, s8
-; RV64IM-NEXT: xor t3, t3, t4
-; RV64IM-NEXT: ld t4, 776(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t5, t4
-; RV64IM-NEXT: ld t5, 752(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t6, t5
-; RV64IM-NEXT: ld t6, 744(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, s0, t6
-; RV64IM-NEXT: ld s0, 720(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, s0
-; RV64IM-NEXT: ld s0, 688(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, s0
-; RV64IM-NEXT: ld s0, 656(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, s0
-; RV64IM-NEXT: ld s0, 600(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, s0
-; RV64IM-NEXT: ld s0, 584(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, s0
-; RV64IM-NEXT: xor a0, a0, a1
-; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, a1
-; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, a1
-; RV64IM-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, a1
-; RV64IM-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, a1
-; RV64IM-NEXT: ld s0, 344(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s1, s0
-; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s3, a1
-; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s3, s4, a1
-; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s2, a1
-; RV64IM-NEXT: ld a1, 880(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a1, t3, a1
-; RV64IM-NEXT: ld t3, 856(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, s5
+; RV64IM-NEXT: xor s3, s3, s10
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 384(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t3, t4, t3
-; RV64IM-NEXT: ld t4, 832(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t5, t4
-; RV64IM-NEXT: ld t5, 808(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, t6, t5
-; RV64IM-NEXT: ld t6, 800(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, t6
-; RV64IM-NEXT: ld t6, 768(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, t6
-; RV64IM-NEXT: ld t6, 736(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t6
-; RV64IM-NEXT: ld t6, 704(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, t6
-; RV64IM-NEXT: ld t6, 672(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, t6
-; RV64IM-NEXT: ld t6, 448(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a0, a0, t6
-; RV64IM-NEXT: ld t6, 440(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a2, a2, t6
-; RV64IM-NEXT: ld t6, 400(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, t6
-; RV64IM-NEXT: ld t6, 392(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, a4, t6
-; RV64IM-NEXT: ld t6, 384(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, t6
-; RV64IM-NEXT: ld t6, 376(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, s0, t6
-; RV64IM-NEXT: ld s0, 424(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s1, s0
-; RV64IM-NEXT: ld s1, 352(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s3, s1
-; RV64IM-NEXT: ld s3, 336(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s2, s3
-; RV64IM-NEXT: ld s3, 904(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t4, s3
-; RV64IM-NEXT: ld s3, 864(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, s3
-; RV64IM-NEXT: ld s3, 824(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, s3
-; RV64IM-NEXT: ld s3, 792(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, s3
-; RV64IM-NEXT: ld s3, 760(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, s3
-; RV64IM-NEXT: ld s3, 728(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, s3
-; RV64IM-NEXT: ld s3, 992(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, s3
-; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t2, s3
-; RV64IM-NEXT: ld s3, 1024(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t6, t6, s3
-; RV64IM-NEXT: ld s3, 1008(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s0, s0, s3
-; RV64IM-NEXT: ld s3, 432(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s1, s1, s3
-; RV64IM-NEXT: ld s3, 416(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor s2, s2, s3
-; RV64IM-NEXT: xor t3, a1, t3
-; RV64IM-NEXT: xor t3, t3, t4
-; RV64IM-NEXT: ld t4, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 608(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t4
+; RV64IM-NEXT: ld t4, 584(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a4, t4
+; RV64IM-NEXT: ld t4, 568(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a5, a5, t4
-; RV64IM-NEXT: ld t4, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 512(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a6, a6, t4
-; RV64IM-NEXT: ld t4, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 472(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor a7, a7, t4
-; RV64IM-NEXT: ld t4, 816(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 440(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t0, t0, t4
-; RV64IM-NEXT: ld t4, 784(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld t4, 424(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t1, t1, t4
-; RV64IM-NEXT: xor a2, a0, a2
-; RV64IM-NEXT: xor a2, a2, a3
-; RV64IM-NEXT: ld a3, 1072(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, t2, a3
-; RV64IM-NEXT: ld t2, 1056(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t6, t2
-; RV64IM-NEXT: ld t4, 1040(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, s0, t4
-; RV64IM-NEXT: ld t6, 1016(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 224(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, s2, a2
+; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t4, t6, t4
+; RV64IM-NEXT: ld t6, 184(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, s0, t6
+; RV64IM-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s1, s0
+; RV64IM-NEXT: ld s1, 136(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s1, s4, s1
+; RV64IM-NEXT: ld s2, 120(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, s2
+; RV64IM-NEXT: ld s2, 104(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s2, s3, s2
+; RV64IM-NEXT: xor t2, t2, t3
+; RV64IM-NEXT: ld t3, 640(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a3, a3, t3
+; RV64IM-NEXT: ld t3, 592(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, t3
+; RV64IM-NEXT: ld t3, 560(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a6, a6, t3
+; RV64IM-NEXT: ld t3, 536(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, t3
+; RV64IM-NEXT: ld t3, 496(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, t3
+; RV64IM-NEXT: ld t3, 456(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, t3
+; RV64IM-NEXT: xor a0, a0, a1
+; RV64IM-NEXT: ld a1, 840(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a1, a2, a1
+; RV64IM-NEXT: ld a2, 240(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, t6, a2
+; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t3, s0, t3
+; RV64IM-NEXT: ld t6, 192(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor t6, s1, t6
-; RV64IM-NEXT: ld s0, 1000(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 168(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, s0
+; RV64IM-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64IM-NEXT: xor s0, s2, s0
-; RV64IM-NEXT: xor t3, t3, t5
-; RV64IM-NEXT: ld t5, 928(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a5, a5, t5
-; RV64IM-NEXT: ld t5, 920(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a6, a6, t5
-; RV64IM-NEXT: ld t5, 896(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a7, a7, t5
-; RV64IM-NEXT: ld t5, 872(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t0, t0, t5
-; RV64IM-NEXT: ld t5, 840(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t1, t1, t5
-; RV64IM-NEXT: xor a2, a2, a4
-; RV64IM-NEXT: ld a4, 1088(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: ld a4, 1080(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor a4, t2, a4
-; RV64IM-NEXT: ld t2, 1064(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t2, t4, t2
-; RV64IM-NEXT: ld t4, 1048(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t4, t6, t4
-; RV64IM-NEXT: ld t5, 1032(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: xor t5, s0, t5
-; RV64IM-NEXT: xor a5, t3, a5
-; RV64IM-NEXT: xor a5, a5, a6
-; RV64IM-NEXT: xor a3, a2, a3
+; RV64IM-NEXT: ld s1, 600(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a5, a5, s1
+; RV64IM-NEXT: ld s1, 552(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a7, a7, s1
+; RV64IM-NEXT: ld s1, 520(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t0, t0, s1
+; RV64IM-NEXT: ld s1, 488(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t1, t1, s1
+; RV64IM-NEXT: ld s1, 832(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, s1
+; RV64IM-NEXT: ld s1, 232(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t6, t6, s1
+; RV64IM-NEXT: ld s1, 208(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor t5, t5, s1
+; RV64IM-NEXT: ld s1, 176(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor s0, s0, s1
+; RV64IM-NEXT: xor a3, t2, a3
; RV64IM-NEXT: xor a3, a3, a4
-; RV64IM-NEXT: slli a1, a1, 56
+; RV64IM-NEXT: ld a4, 624(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a4, a5, a4
+; RV64IM-NEXT: xor a1, a0, a1
+; RV64IM-NEXT: xor a1, a1, t4
+; RV64IM-NEXT: ld a5, 848(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: xor a2, a2, a5
+; RV64IM-NEXT: xor a4, a3, a4
+; RV64IM-NEXT: xor a4, a4, a6
+; RV64IM-NEXT: xor a2, a1, a2
+; RV64IM-NEXT: xor a2, a2, t3
+; RV64IM-NEXT: slli t2, t2, 56
; RV64IM-NEXT: slli a0, a0, 56
-; RV64IM-NEXT: ld t6, 952(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a4, t3, t6
-; RV64IM-NEXT: and a2, a2, t6
-; RV64IM-NEXT: slli a4, a4, 40
-; RV64IM-NEXT: slli a2, a2, 40
-; RV64IM-NEXT: or a1, a1, a4
-; RV64IM-NEXT: srli a4, a5, 8
-; RV64IM-NEXT: or a0, a0, a2
-; RV64IM-NEXT: srli a2, a3, 8
-; RV64IM-NEXT: ld a6, 968(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a4, a4, a6
-; RV64IM-NEXT: and a2, a2, a6
-; RV64IM-NEXT: xor a6, a5, a7
+; RV64IM-NEXT: ld t4, 784(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a3, a3, t4
+; RV64IM-NEXT: and a1, a1, t4
+; RV64IM-NEXT: slli a3, a3, 40
+; RV64IM-NEXT: slli a1, a1, 40
+; RV64IM-NEXT: or a3, t2, a3
+; RV64IM-NEXT: srli a5, a4, 8
+; RV64IM-NEXT: or a0, a0, a1
+; RV64IM-NEXT: srli a1, a2, 8
+; RV64IM-NEXT: ld a6, 808(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a5, a5, a6
+; RV64IM-NEXT: and a1, a1, a6
+; RV64IM-NEXT: xor a6, a4, a7
; RV64IM-NEXT: lui t3, 4080
-; RV64IM-NEXT: and a5, a5, t3
-; RV64IM-NEXT: xor a7, a3, t2
-; RV64IM-NEXT: and a3, a3, t3
+; RV64IM-NEXT: and a4, a4, t3
+; RV64IM-NEXT: xor a7, a2, t6
+; RV64IM-NEXT: and a2, a2, t3
; RV64IM-NEXT: xor t0, a6, t0
; RV64IM-NEXT: srli a6, a6, 24
-; RV64IM-NEXT: xor t2, a7, t4
+; RV64IM-NEXT: xor t2, a7, t5
; RV64IM-NEXT: srli a7, a7, 24
; RV64IM-NEXT: and a6, a6, t3
; RV64IM-NEXT: and a7, a7, t3
-; RV64IM-NEXT: or a4, a4, a6
+; RV64IM-NEXT: or a5, a5, a6
; RV64IM-NEXT: srli a6, t0, 40
-; RV64IM-NEXT: or a2, a2, a7
+; RV64IM-NEXT: or a1, a1, a7
; RV64IM-NEXT: srli a7, t2, 40
-; RV64IM-NEXT: and a6, a6, t6
-; RV64IM-NEXT: and a7, a7, t6
-; RV64IM-NEXT: slli a5, a5, 24
+; RV64IM-NEXT: and a6, a6, t4
+; RV64IM-NEXT: and a7, a7, t4
+; RV64IM-NEXT: slli a4, a4, 24
; RV64IM-NEXT: xor t1, t0, t1
; RV64IM-NEXT: srliw t0, t0, 24
; RV64IM-NEXT: slli t0, t0, 32
-; RV64IM-NEXT: or a5, a5, t0
+; RV64IM-NEXT: or a4, a4, t0
; RV64IM-NEXT: srli t0, t1, 56
; RV64IM-NEXT: or a6, a6, t0
-; RV64IM-NEXT: slli a3, a3, 24
-; RV64IM-NEXT: xor t0, t2, t5
+; RV64IM-NEXT: slli a2, a2, 24
+; RV64IM-NEXT: xor t0, t2, s0
; RV64IM-NEXT: srliw t1, t2, 24
; RV64IM-NEXT: slli t1, t1, 32
-; RV64IM-NEXT: or a3, a3, t1
+; RV64IM-NEXT: or a2, a2, t1
; RV64IM-NEXT: srli t0, t0, 56
; RV64IM-NEXT: or a7, a7, t0
-; RV64IM-NEXT: or a1, a1, a5
-; RV64IM-NEXT: or a4, a4, a6
-; RV64IM-NEXT: or a0, a0, a3
-; RV64IM-NEXT: or a2, a2, a7
-; RV64IM-NEXT: or a1, a1, a4
+; RV64IM-NEXT: or a3, a3, a4
+; RV64IM-NEXT: or a4, a5, a6
; RV64IM-NEXT: or a0, a0, a2
-; RV64IM-NEXT: srli a2, a1, 4
-; RV64IM-NEXT: ld a4, 960(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: and a1, a1, a4
+; RV64IM-NEXT: or a1, a1, a7
+; RV64IM-NEXT: or a3, a3, a4
+; RV64IM-NEXT: or a0, a0, a1
+; RV64IM-NEXT: srli a1, a3, 4
+; RV64IM-NEXT: ld a4, 800(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: and a2, a3, a4
; RV64IM-NEXT: srli a3, a0, 4
; RV64IM-NEXT: and a0, a0, a4
-; RV64IM-NEXT: and a2, a2, a4
+; RV64IM-NEXT: and a1, a1, a4
; RV64IM-NEXT: and a3, a3, a4
-; RV64IM-NEXT: slli a1, a1, 4
+; RV64IM-NEXT: slli a2, a2, 4
; RV64IM-NEXT: slli a0, a0, 4
-; RV64IM-NEXT: or a1, a2, a1
+; RV64IM-NEXT: or a1, a1, a2
; RV64IM-NEXT: or a0, a3, a0
; RV64IM-NEXT: srli a2, a1, 2
-; RV64IM-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a4, 792(sp) # 8-byte Folded Reload
; RV64IM-NEXT: and a1, a1, a4
; RV64IM-NEXT: srli a3, a0, 2
; RV64IM-NEXT: and a0, a0, a4
@@ -10614,7 +9725,7 @@ define void @commutative_clmulr_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %
; RV64IM-NEXT: or a1, a2, a1
; RV64IM-NEXT: or a0, a3, a0
; RV64IM-NEXT: srli a2, a1, 1
-; RV64IM-NEXT: ld a4, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a4, 776(sp) # 8-byte Folded Reload
; RV64IM-NEXT: and a1, a1, a4
; RV64IM-NEXT: srli a3, a0, 1
; RV64IM-NEXT: and a0, a0, a4
@@ -10624,26 +9735,26 @@ define void @commutative_clmulr_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %
; RV64IM-NEXT: or a1, a2, a1
; RV64IM-NEXT: slli a0, a0, 1
; RV64IM-NEXT: or a0, a3, a0
-; RV64IM-NEXT: ld a2, 976(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 816(sp) # 8-byte Folded Reload
; RV64IM-NEXT: sd a1, 0(a2)
; RV64IM-NEXT: sd a0, 8(a2)
-; RV64IM-NEXT: ld a2, 984(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld a2, 824(sp) # 8-byte Folded Reload
; RV64IM-NEXT: sd a1, 0(a2)
; RV64IM-NEXT: sd a0, 8(a2)
-; RV64IM-NEXT: ld ra, 1192(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s0, 1184(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s1, 1176(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s2, 1168(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s3, 1160(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s4, 1152(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s5, 1144(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s6, 1136(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s7, 1128(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s8, 1120(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s9, 1112(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s10, 1104(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: ld s11, 1096(sp) # 8-byte Folded Reload
-; RV64IM-NEXT: addi sp, sp, 1200
+; RV64IM-NEXT: ld ra, 952(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s0, 944(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s1, 936(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s2, 928(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s3, 920(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s4, 912(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s5, 904(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s6, 896(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s7, 888(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s8, 880(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s9, 872(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s10, 864(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: ld s11, 856(sp) # 8-byte Folded Reload
+; RV64IM-NEXT: addi sp, sp, 960
; RV64IM-NEXT: ret
;
; RV32IMZBS-LABEL: commutative_clmulr_v2i64:
@@ -10662,2135 +9773,2236 @@ define void @commutative_clmulr_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %
; RV32IMZBS-NEXT: sw s9, 756(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: sw s10, 752(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: sw s11, 748(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a3, 680(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: sw a2, 676(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a5, 0(a0)
-; RV32IMZBS-NEXT: lw s1, 4(a0)
-; RV32IMZBS-NEXT: lw t1, 8(a0)
-; RV32IMZBS-NEXT: lw a2, 12(a0)
-; RV32IMZBS-NEXT: lw t6, 0(a1)
-; RV32IMZBS-NEXT: lw s3, 4(a1)
-; RV32IMZBS-NEXT: lw t3, 8(a1)
-; RV32IMZBS-NEXT: lw a1, 12(a1)
+; RV32IMZBS-NEXT: sw a3, 736(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw a2, 732(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s1, 0(a0)
+; RV32IMZBS-NEXT: lw a3, 4(a0)
+; RV32IMZBS-NEXT: lw a7, 8(a0)
+; RV32IMZBS-NEXT: lw t1, 12(a0)
+; RV32IMZBS-NEXT: lw s7, 0(a1)
+; RV32IMZBS-NEXT: lw a5, 4(a1)
+; RV32IMZBS-NEXT: lw t0, 8(a1)
+; RV32IMZBS-NEXT: lw t2, 12(a1)
; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: lui t2, 61681
-; RV32IMZBS-NEXT: lui a7, 209715
-; RV32IMZBS-NEXT: lui s11, 349525
-; RV32IMZBS-NEXT: bseti a3, zero, 11
-; RV32IMZBS-NEXT: sw a3, 740(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: addi s10, a0, -256
-; RV32IMZBS-NEXT: srli a0, s3, 8
-; RV32IMZBS-NEXT: srli s2, s3, 24
-; RV32IMZBS-NEXT: and a3, s3, s10
-; RV32IMZBS-NEXT: slli s3, s3, 24
-; RV32IMZBS-NEXT: srli a4, s1, 8
-; RV32IMZBS-NEXT: srli s0, s1, 24
-; RV32IMZBS-NEXT: and a6, s1, s10
+; RV32IMZBS-NEXT: lui a1, 61681
+; RV32IMZBS-NEXT: lui a2, 209715
+; RV32IMZBS-NEXT: lui a4, 349525
+; RV32IMZBS-NEXT: addi ra, a0, -256
+; RV32IMZBS-NEXT: addi s11, a1, -241
+; RV32IMZBS-NEXT: addi s10, a2, 819
+; RV32IMZBS-NEXT: addi s5, a4, 1365
+; RV32IMZBS-NEXT: srli s0, a5, 8
+; RV32IMZBS-NEXT: srli a2, a5, 24
+; RV32IMZBS-NEXT: and a4, a5, ra
+; RV32IMZBS-NEXT: slli a5, a5, 24
+; RV32IMZBS-NEXT: srli t3, a3, 8
+; RV32IMZBS-NEXT: srli t4, a3, 24
+; RV32IMZBS-NEXT: and a0, a3, ra
+; RV32IMZBS-NEXT: slli a1, a3, 24
+; RV32IMZBS-NEXT: srli s8, s7, 8
+; RV32IMZBS-NEXT: srli s9, s7, 24
+; RV32IMZBS-NEXT: and s6, s7, ra
+; RV32IMZBS-NEXT: slli s7, s7, 24
+; RV32IMZBS-NEXT: srli a3, s1, 8
+; RV32IMZBS-NEXT: srli t5, s1, 24
+; RV32IMZBS-NEXT: and a6, s1, ra
; RV32IMZBS-NEXT: slli s1, s1, 24
-; RV32IMZBS-NEXT: srli t0, t6, 8
-; RV32IMZBS-NEXT: srli t5, t6, 24
-; RV32IMZBS-NEXT: and t4, t6, s10
-; RV32IMZBS-NEXT: slli t6, t6, 24
-; RV32IMZBS-NEXT: and a0, a0, s10
-; RV32IMZBS-NEXT: or a0, a0, s2
-; RV32IMZBS-NEXT: srli s2, a5, 8
-; RV32IMZBS-NEXT: slli a3, a3, 8
-; RV32IMZBS-NEXT: or a3, s3, a3
-; RV32IMZBS-NEXT: srli s4, a5, 24
-; RV32IMZBS-NEXT: and a4, a4, s10
-; RV32IMZBS-NEXT: or a4, a4, s0
-; RV32IMZBS-NEXT: and s0, a5, s10
-; RV32IMZBS-NEXT: slli s3, a5, 24
-; RV32IMZBS-NEXT: slli a5, a6, 8
-; RV32IMZBS-NEXT: or a5, s1, a5
-; RV32IMZBS-NEXT: srli s5, a1, 8
-; RV32IMZBS-NEXT: and a6, t0, s10
-; RV32IMZBS-NEXT: or t0, a6, t5
-; RV32IMZBS-NEXT: srli s7, a1, 24
-; RV32IMZBS-NEXT: slli t4, t4, 8
-; RV32IMZBS-NEXT: or t4, t6, t4
-; RV32IMZBS-NEXT: and s6, a1, s10
-; RV32IMZBS-NEXT: slli s8, a1, 24
-; RV32IMZBS-NEXT: and a1, s2, s10
-; RV32IMZBS-NEXT: or a1, a1, s4
-; RV32IMZBS-NEXT: srli a6, a2, 8
-; RV32IMZBS-NEXT: slli s0, s0, 8
-; RV32IMZBS-NEXT: or t5, s3, s0
-; RV32IMZBS-NEXT: srli s1, a2, 24
-; RV32IMZBS-NEXT: and t6, s5, s10
-; RV32IMZBS-NEXT: or t6, t6, s7
-; RV32IMZBS-NEXT: and s0, a2, s10
-; RV32IMZBS-NEXT: slli s3, a2, 24
+; RV32IMZBS-NEXT: srli s3, t2, 8
+; RV32IMZBS-NEXT: and t6, s0, ra
+; RV32IMZBS-NEXT: or a2, t6, a2
+; RV32IMZBS-NEXT: srli s4, t2, 24
+; RV32IMZBS-NEXT: slli a4, a4, 8
+; RV32IMZBS-NEXT: or a4, a5, a4
+; RV32IMZBS-NEXT: and a5, t2, ra
+; RV32IMZBS-NEXT: slli s0, t2, 24
+; RV32IMZBS-NEXT: and t2, t3, ra
+; RV32IMZBS-NEXT: or t2, t2, t4
+; RV32IMZBS-NEXT: srli t3, t1, 8
+; RV32IMZBS-NEXT: slli a0, a0, 8
+; RV32IMZBS-NEXT: or a0, a1, a0
+; RV32IMZBS-NEXT: srli s2, t1, 24
+; RV32IMZBS-NEXT: and a1, s8, ra
+; RV32IMZBS-NEXT: or a1, a1, s9
+; RV32IMZBS-NEXT: and t4, t1, ra
+; RV32IMZBS-NEXT: slli t6, t1, 24
; RV32IMZBS-NEXT: slli s6, s6, 8
-; RV32IMZBS-NEXT: or a2, s8, s6
-; RV32IMZBS-NEXT: srli s4, t3, 8
-; RV32IMZBS-NEXT: and a6, a6, s10
-; RV32IMZBS-NEXT: or a6, a6, s1
-; RV32IMZBS-NEXT: srli s5, t3, 24
-; RV32IMZBS-NEXT: slli s0, s0, 8
-; RV32IMZBS-NEXT: or s1, s3, s0
-; RV32IMZBS-NEXT: and s2, t3, s10
-; RV32IMZBS-NEXT: slli t3, t3, 24
-; RV32IMZBS-NEXT: and s0, s4, s10
-; RV32IMZBS-NEXT: or s0, s0, s5
-; RV32IMZBS-NEXT: srli s3, t1, 8
+; RV32IMZBS-NEXT: or t1, s7, s6
+; RV32IMZBS-NEXT: srli s6, t0, 8
+; RV32IMZBS-NEXT: and a3, a3, ra
+; RV32IMZBS-NEXT: or a3, a3, t5
+; RV32IMZBS-NEXT: srli t5, t0, 24
+; RV32IMZBS-NEXT: slli a6, a6, 8
+; RV32IMZBS-NEXT: or a6, s1, a6
+; RV32IMZBS-NEXT: and s7, t0, ra
+; RV32IMZBS-NEXT: slli s1, t0, 24
+; RV32IMZBS-NEXT: and t0, s3, ra
+; RV32IMZBS-NEXT: or t0, t0, s4
+; RV32IMZBS-NEXT: srli s3, a7, 8
+; RV32IMZBS-NEXT: slli a5, a5, 8
+; RV32IMZBS-NEXT: or a5, s0, a5
+; RV32IMZBS-NEXT: srli s0, a7, 24
+; RV32IMZBS-NEXT: and t3, t3, ra
+; RV32IMZBS-NEXT: or t3, t3, s2
+; RV32IMZBS-NEXT: and s2, a7, ra
+; RV32IMZBS-NEXT: slli a7, a7, 24
+; RV32IMZBS-NEXT: slli t4, t4, 8
+; RV32IMZBS-NEXT: and s4, s6, ra
+; RV32IMZBS-NEXT: slli s7, s7, 8
+; RV32IMZBS-NEXT: and s3, s3, ra
; RV32IMZBS-NEXT: slli s2, s2, 8
-; RV32IMZBS-NEXT: or s2, t3, s2
-; RV32IMZBS-NEXT: srli t3, t1, 24
-; RV32IMZBS-NEXT: and s3, s3, s10
-; RV32IMZBS-NEXT: or s3, s3, t3
-; RV32IMZBS-NEXT: and t3, t1, s10
-; RV32IMZBS-NEXT: sw s10, 744(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli t1, t1, 24
-; RV32IMZBS-NEXT: slli t3, t3, 8
-; RV32IMZBS-NEXT: or s4, t1, t3
-; RV32IMZBS-NEXT: or a0, a3, a0
-; RV32IMZBS-NEXT: or a4, a5, a4
-; RV32IMZBS-NEXT: or a3, t4, t0
-; RV32IMZBS-NEXT: or a1, t5, a1
-; RV32IMZBS-NEXT: or a2, a2, t6
-; RV32IMZBS-NEXT: addi t3, t2, -241
-; RV32IMZBS-NEXT: sw t3, 728(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: addi t2, a7, 819
-; RV32IMZBS-NEXT: or a5, s1, a6
-; RV32IMZBS-NEXT: addi t1, s11, 1365
-; RV32IMZBS-NEXT: or a6, s2, s0
-; RV32IMZBS-NEXT: or a7, s4, s3
-; RV32IMZBS-NEXT: srli t0, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, t3
+; RV32IMZBS-NEXT: or t4, t6, t4
+; RV32IMZBS-NEXT: or t5, s4, t5
+; RV32IMZBS-NEXT: or t6, s1, s7
+; RV32IMZBS-NEXT: or s0, s3, s0
+; RV32IMZBS-NEXT: or a7, a7, s2
+; RV32IMZBS-NEXT: or a2, a4, a2
+; RV32IMZBS-NEXT: or a0, a0, t2
+; RV32IMZBS-NEXT: or a1, t1, a1
+; RV32IMZBS-NEXT: or a3, a6, a3
+; RV32IMZBS-NEXT: or a4, a5, t0
+; RV32IMZBS-NEXT: or a5, t4, t3
+; RV32IMZBS-NEXT: or a6, t6, t5
+; RV32IMZBS-NEXT: or a7, a7, s0
+; RV32IMZBS-NEXT: srli t0, a2, 4
+; RV32IMZBS-NEXT: sw s11, 744(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, s11
+; RV32IMZBS-NEXT: srli t1, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, s11
+; RV32IMZBS-NEXT: srli t2, a1, 4
+; RV32IMZBS-NEXT: and a1, a1, s11
+; RV32IMZBS-NEXT: srli t3, a3, 4
+; RV32IMZBS-NEXT: and a3, a3, s11
; RV32IMZBS-NEXT: srli t4, a4, 4
-; RV32IMZBS-NEXT: and a4, a4, t3
-; RV32IMZBS-NEXT: and t0, t0, t3
+; RV32IMZBS-NEXT: and a4, a4, s11
+; RV32IMZBS-NEXT: srli t5, a5, 4
+; RV32IMZBS-NEXT: and a5, a5, s11
+; RV32IMZBS-NEXT: srli t6, a6, 4
+; RV32IMZBS-NEXT: and a6, a6, s11
+; RV32IMZBS-NEXT: srli s0, a7, 4
+; RV32IMZBS-NEXT: and a7, a7, s11
+; RV32IMZBS-NEXT: and t0, t0, s11
+; RV32IMZBS-NEXT: slli a2, a2, 4
+; RV32IMZBS-NEXT: and t1, t1, s11
; RV32IMZBS-NEXT: slli a0, a0, 4
-; RV32IMZBS-NEXT: or a0, t0, a0
-; RV32IMZBS-NEXT: srli t0, a3, 4
-; RV32IMZBS-NEXT: and a3, a3, t3
-; RV32IMZBS-NEXT: and t4, t4, t3
-; RV32IMZBS-NEXT: slli a4, a4, 4
-; RV32IMZBS-NEXT: or a4, t4, a4
-; RV32IMZBS-NEXT: srli t4, a1, 4
-; RV32IMZBS-NEXT: and a1, a1, t3
-; RV32IMZBS-NEXT: and t0, t0, t3
-; RV32IMZBS-NEXT: slli a3, a3, 4
-; RV32IMZBS-NEXT: or a3, t0, a3
-; RV32IMZBS-NEXT: srli t0, a2, 4
-; RV32IMZBS-NEXT: and a2, a2, t3
-; RV32IMZBS-NEXT: and t4, t4, t3
+; RV32IMZBS-NEXT: and t2, t2, s11
; RV32IMZBS-NEXT: slli a1, a1, 4
-; RV32IMZBS-NEXT: or a1, t4, a1
-; RV32IMZBS-NEXT: srli t4, a5, 4
-; RV32IMZBS-NEXT: and a5, a5, t3
-; RV32IMZBS-NEXT: and t0, t0, t3
-; RV32IMZBS-NEXT: slli a2, a2, 4
-; RV32IMZBS-NEXT: or a2, t0, a2
-; RV32IMZBS-NEXT: srli t0, a6, 4
-; RV32IMZBS-NEXT: and a6, a6, t3
-; RV32IMZBS-NEXT: and t4, t4, t3
+; RV32IMZBS-NEXT: and t3, t3, s11
+; RV32IMZBS-NEXT: slli a3, a3, 4
+; RV32IMZBS-NEXT: and t4, t4, s11
+; RV32IMZBS-NEXT: slli a4, a4, 4
+; RV32IMZBS-NEXT: and t5, t5, s11
; RV32IMZBS-NEXT: slli a5, a5, 4
-; RV32IMZBS-NEXT: or a5, t4, a5
-; RV32IMZBS-NEXT: srli t4, a7, 4
-; RV32IMZBS-NEXT: and a7, a7, t3
-; RV32IMZBS-NEXT: and t0, t0, t3
+; RV32IMZBS-NEXT: and t6, t6, s11
; RV32IMZBS-NEXT: slli a6, a6, 4
-; RV32IMZBS-NEXT: and t4, t4, t3
+; RV32IMZBS-NEXT: and s0, s0, s11
; RV32IMZBS-NEXT: slli a7, a7, 4
-; RV32IMZBS-NEXT: or a6, t0, a6
-; RV32IMZBS-NEXT: or a7, t4, a7
-; RV32IMZBS-NEXT: srli t0, a0, 2
-; RV32IMZBS-NEXT: sw t2, 732(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a0, t2
-; RV32IMZBS-NEXT: srli t4, a4, 2
-; RV32IMZBS-NEXT: and a4, a4, t2
-; RV32IMZBS-NEXT: and t0, t0, t2
-; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: or a0, t0, a0
-; RV32IMZBS-NEXT: srli t0, a3, 2
-; RV32IMZBS-NEXT: and a3, a3, t2
-; RV32IMZBS-NEXT: and t4, t4, t2
-; RV32IMZBS-NEXT: slli a4, a4, 2
+; RV32IMZBS-NEXT: or a2, t0, a2
+; RV32IMZBS-NEXT: or a0, t1, a0
+; RV32IMZBS-NEXT: or a1, t2, a1
+; RV32IMZBS-NEXT: or a3, t3, a3
; RV32IMZBS-NEXT: or a4, t4, a4
-; RV32IMZBS-NEXT: srli t4, a1, 2
-; RV32IMZBS-NEXT: and a1, a1, t2
-; RV32IMZBS-NEXT: and t0, t0, t2
-; RV32IMZBS-NEXT: slli a3, a3, 2
-; RV32IMZBS-NEXT: or a3, t0, a3
+; RV32IMZBS-NEXT: or a5, t5, a5
+; RV32IMZBS-NEXT: or a6, t6, a6
+; RV32IMZBS-NEXT: or a7, s0, a7
; RV32IMZBS-NEXT: srli t0, a2, 2
-; RV32IMZBS-NEXT: and a2, a2, t2
-; RV32IMZBS-NEXT: and t4, t4, t2
-; RV32IMZBS-NEXT: slli a1, a1, 2
-; RV32IMZBS-NEXT: or a1, t4, a1
-; RV32IMZBS-NEXT: srli t4, a5, 2
-; RV32IMZBS-NEXT: and a5, a5, t2
-; RV32IMZBS-NEXT: and t0, t0, t2
+; RV32IMZBS-NEXT: sw s10, 728(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, s10
+; RV32IMZBS-NEXT: srli t1, a0, 2
+; RV32IMZBS-NEXT: and a0, a0, s10
+; RV32IMZBS-NEXT: srli t2, a1, 2
+; RV32IMZBS-NEXT: and a1, a1, s10
+; RV32IMZBS-NEXT: srli t3, a3, 2
+; RV32IMZBS-NEXT: and a3, a3, s10
+; RV32IMZBS-NEXT: srli t4, a4, 2
+; RV32IMZBS-NEXT: and a4, a4, s10
+; RV32IMZBS-NEXT: srli t5, a5, 2
+; RV32IMZBS-NEXT: and a5, a5, s10
+; RV32IMZBS-NEXT: srli t6, a6, 2
+; RV32IMZBS-NEXT: and a6, a6, s10
+; RV32IMZBS-NEXT: srli s0, a7, 2
+; RV32IMZBS-NEXT: and a7, a7, s10
+; RV32IMZBS-NEXT: and t0, t0, s10
; RV32IMZBS-NEXT: slli a2, a2, 2
-; RV32IMZBS-NEXT: or t0, t0, a2
-; RV32IMZBS-NEXT: srli a2, a6, 2
-; RV32IMZBS-NEXT: and a6, a6, t2
-; RV32IMZBS-NEXT: and t4, t4, t2
+; RV32IMZBS-NEXT: and t1, t1, s10
+; RV32IMZBS-NEXT: slli a0, a0, 2
+; RV32IMZBS-NEXT: and t2, t2, s10
+; RV32IMZBS-NEXT: slli a1, a1, 2
+; RV32IMZBS-NEXT: and t3, t3, s10
+; RV32IMZBS-NEXT: slli a3, a3, 2
+; RV32IMZBS-NEXT: and t4, t4, s10
+; RV32IMZBS-NEXT: slli a4, a4, 2
+; RV32IMZBS-NEXT: and t5, t5, s10
; RV32IMZBS-NEXT: slli a5, a5, 2
-; RV32IMZBS-NEXT: or a5, t4, a5
-; RV32IMZBS-NEXT: srli t4, a7, 2
-; RV32IMZBS-NEXT: and a7, a7, t2
-; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: and t6, t6, s10
; RV32IMZBS-NEXT: slli a6, a6, 2
-; RV32IMZBS-NEXT: and t4, t4, t2
+; RV32IMZBS-NEXT: and s0, s0, s10
; RV32IMZBS-NEXT: slli a7, a7, 2
-; RV32IMZBS-NEXT: or a6, a2, a6
-; RV32IMZBS-NEXT: or t4, t4, a7
-; RV32IMZBS-NEXT: srli a2, a0, 1
-; RV32IMZBS-NEXT: sw t1, 736(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and a0, a0, t1
-; RV32IMZBS-NEXT: srli a7, a4, 1
-; RV32IMZBS-NEXT: and a4, a4, t1
-; RV32IMZBS-NEXT: and a2, a2, t1
+; RV32IMZBS-NEXT: or a2, t0, a2
+; RV32IMZBS-NEXT: or a0, t1, a0
+; RV32IMZBS-NEXT: or a1, t2, a1
+; RV32IMZBS-NEXT: or a3, t3, a3
+; RV32IMZBS-NEXT: or a4, t4, a4
+; RV32IMZBS-NEXT: or a5, t5, a5
+; RV32IMZBS-NEXT: or a6, t6, a6
+; RV32IMZBS-NEXT: or a7, s0, a7
+; RV32IMZBS-NEXT: srli t0, a2, 1
+; RV32IMZBS-NEXT: sw s5, 724(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, s5
+; RV32IMZBS-NEXT: srli t1, a0, 1
+; RV32IMZBS-NEXT: and a0, a0, s5
+; RV32IMZBS-NEXT: srli t2, a1, 1
+; RV32IMZBS-NEXT: and a1, a1, s5
+; RV32IMZBS-NEXT: srli t3, a3, 1
+; RV32IMZBS-NEXT: and a3, a3, s5
+; RV32IMZBS-NEXT: srli t4, a4, 1
+; RV32IMZBS-NEXT: and a4, a4, s5
+; RV32IMZBS-NEXT: srli t5, a5, 1
+; RV32IMZBS-NEXT: and a5, a5, s5
+; RV32IMZBS-NEXT: srli t6, a6, 1
+; RV32IMZBS-NEXT: and a6, a6, s5
+; RV32IMZBS-NEXT: srli s0, a7, 1
+; RV32IMZBS-NEXT: and a7, a7, s5
+; RV32IMZBS-NEXT: and t0, t0, s5
+; RV32IMZBS-NEXT: slli a2, a2, 1
+; RV32IMZBS-NEXT: and t1, t1, s5
; RV32IMZBS-NEXT: slli a0, a0, 1
-; RV32IMZBS-NEXT: or a2, a2, a0
-; RV32IMZBS-NEXT: srli a0, a3, 1
-; RV32IMZBS-NEXT: and a3, a3, t1
-; RV32IMZBS-NEXT: and a7, a7, t1
+; RV32IMZBS-NEXT: and t2, t2, s5
+; RV32IMZBS-NEXT: slli a1, a1, 1
+; RV32IMZBS-NEXT: and t3, t3, s5
+; RV32IMZBS-NEXT: slli s1, a3, 1
+; RV32IMZBS-NEXT: and t4, t4, s5
; RV32IMZBS-NEXT: slli a4, a4, 1
-; RV32IMZBS-NEXT: or a4, a7, a4
-; RV32IMZBS-NEXT: srli a7, a1, 1
-; RV32IMZBS-NEXT: and t5, a1, t1
-; RV32IMZBS-NEXT: and a0, a0, t1
-; RV32IMZBS-NEXT: slli a1, a3, 1
-; RV32IMZBS-NEXT: or a1, a0, a1
-; RV32IMZBS-NEXT: srli a3, t0, 1
-; RV32IMZBS-NEXT: and t0, t0, t1
-; RV32IMZBS-NEXT: and a0, a7, t1
-; RV32IMZBS-NEXT: slli t5, t5, 1
-; RV32IMZBS-NEXT: or a0, a0, t5
-; RV32IMZBS-NEXT: srli a7, a5, 1
-; RV32IMZBS-NEXT: and a5, a5, t1
-; RV32IMZBS-NEXT: and a3, a3, t1
-; RV32IMZBS-NEXT: slli t0, t0, 1
-; RV32IMZBS-NEXT: or a3, a3, t0
-; RV32IMZBS-NEXT: srli t0, a6, 1
-; RV32IMZBS-NEXT: and a6, a6, t1
-; RV32IMZBS-NEXT: and a7, a7, t1
+; RV32IMZBS-NEXT: and t5, t5, s5
; RV32IMZBS-NEXT: slli a5, a5, 1
-; RV32IMZBS-NEXT: or a7, a7, a5
-; RV32IMZBS-NEXT: srli a5, t4, 1
-; RV32IMZBS-NEXT: and t4, t4, t1
-; RV32IMZBS-NEXT: and t0, t0, t1
+; RV32IMZBS-NEXT: and t6, t6, s5
; RV32IMZBS-NEXT: slli a6, a6, 1
-; RV32IMZBS-NEXT: and a5, a5, t1
-; RV32IMZBS-NEXT: slli t4, t4, 1
-; RV32IMZBS-NEXT: or a6, t0, a6
-; RV32IMZBS-NEXT: or a5, a5, t4
-; RV32IMZBS-NEXT: srli t0, a2, 8
-; RV32IMZBS-NEXT: srli t4, a2, 24
-; RV32IMZBS-NEXT: and t0, t0, s10
-; RV32IMZBS-NEXT: or t0, t0, t4
-; RV32IMZBS-NEXT: sw t0, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s3, 1024
-; RV32IMZBS-NEXT: lui s4, 2048
-; RV32IMZBS-NEXT: lui s7, 4096
-; RV32IMZBS-NEXT: lui s11, 8192
-; RV32IMZBS-NEXT: lui ra, 16384
-; RV32IMZBS-NEXT: andi t0, a0, 2
-; RV32IMZBS-NEXT: sw t0, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 1
-; RV32IMZBS-NEXT: sw t0, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 4
-; RV32IMZBS-NEXT: sw t0, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 8
-; RV32IMZBS-NEXT: sw t0, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 16
-; RV32IMZBS-NEXT: sw t0, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 32
-; RV32IMZBS-NEXT: sw t0, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 64
-; RV32IMZBS-NEXT: sw t0, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 128
-; RV32IMZBS-NEXT: sw t0, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 256
-; RV32IMZBS-NEXT: sw t0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 512
-; RV32IMZBS-NEXT: sw t0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t0, a0, 1024
-; RV32IMZBS-NEXT: sw t0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw t4, 740(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and t0, a0, t4
-; RV32IMZBS-NEXT: sw t0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s0, 1
-; RV32IMZBS-NEXT: and t0, a0, s0
-; RV32IMZBS-NEXT: sw t0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s1, 2
-; RV32IMZBS-NEXT: and t0, a0, s1
-; RV32IMZBS-NEXT: sw t0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t1, 4
-; RV32IMZBS-NEXT: and t0, a0, t1
-; RV32IMZBS-NEXT: sw t0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t5, 8
-; RV32IMZBS-NEXT: and t0, a0, t5
-; RV32IMZBS-NEXT: sw t0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s8, 16
-; RV32IMZBS-NEXT: and t0, a0, s8
-; RV32IMZBS-NEXT: sw t0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t6, 32
-; RV32IMZBS-NEXT: and t0, a0, t6
-; RV32IMZBS-NEXT: sw t0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s2, 64
-; RV32IMZBS-NEXT: and t0, a0, s2
-; RV32IMZBS-NEXT: sw t0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t2, 128
-; RV32IMZBS-NEXT: and t0, a0, t2
-; RV32IMZBS-NEXT: sw t0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s5, 256
-; RV32IMZBS-NEXT: and t0, a0, s5
-; RV32IMZBS-NEXT: sw t0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 512
-; RV32IMZBS-NEXT: and t3, a0, t0
-; RV32IMZBS-NEXT: sw t3, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t3, a0, s3
-; RV32IMZBS-NEXT: lui s6, 1024
-; RV32IMZBS-NEXT: sw t3, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t3, a0, s4
-; RV32IMZBS-NEXT: lui s9, 2048
-; RV32IMZBS-NEXT: sw t3, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t3, a0, s7
-; RV32IMZBS-NEXT: lui s10, 4096
-; RV32IMZBS-NEXT: sw t3, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t3, a0, s11
-; RV32IMZBS-NEXT: sw t3, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t3, a0, ra
-; RV32IMZBS-NEXT: sw t3, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t3, 32768
-; RV32IMZBS-NEXT: and t3, a0, t3
-; RV32IMZBS-NEXT: sw t3, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t3, 65536
-; RV32IMZBS-NEXT: and t3, a0, t3
-; RV32IMZBS-NEXT: sw t3, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t3, 131072
-; RV32IMZBS-NEXT: and t3, a0, t3
-; RV32IMZBS-NEXT: sw t3, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t3, 262144
-; RV32IMZBS-NEXT: and t3, a0, t3
-; RV32IMZBS-NEXT: sw t3, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t3, 524288
-; RV32IMZBS-NEXT: and a0, a0, t3
-; RV32IMZBS-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 2
-; RV32IMZBS-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 1
-; RV32IMZBS-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 4
+; RV32IMZBS-NEXT: and s0, s0, s5
+; RV32IMZBS-NEXT: slli a3, a7, 1
+; RV32IMZBS-NEXT: or s6, t0, a2
+; RV32IMZBS-NEXT: or s8, t1, a0
+; RV32IMZBS-NEXT: or s7, t2, a1
+; RV32IMZBS-NEXT: srli a0, a0, 31
+; RV32IMZBS-NEXT: or s9, t3, s1
+; RV32IMZBS-NEXT: srli a1, s1, 31
+; RV32IMZBS-NEXT: or a7, t4, a4
+; RV32IMZBS-NEXT: or t1, t5, a5
+; RV32IMZBS-NEXT: or t0, t6, a6
+; RV32IMZBS-NEXT: srli a2, a5, 31
+; RV32IMZBS-NEXT: or t4, s0, a3
+; RV32IMZBS-NEXT: srli s0, a3, 31
+; RV32IMZBS-NEXT: srli a3, s6, 8
+; RV32IMZBS-NEXT: srli a4, s6, 24
+; RV32IMZBS-NEXT: srli a5, s8, 8
+; RV32IMZBS-NEXT: srli a6, s8, 24
+; RV32IMZBS-NEXT: slli t2, s8, 24
+; RV32IMZBS-NEXT: and t3, s8, ra
+; RV32IMZBS-NEXT: slli t5, s7, 31
+; RV32IMZBS-NEXT: seqz s1, a0
+; RV32IMZBS-NEXT: slli t6, s6, 31
+; RV32IMZBS-NEXT: seqz s2, a1
+; RV32IMZBS-NEXT: srli a0, a7, 8
+; RV32IMZBS-NEXT: and a3, a3, ra
+; RV32IMZBS-NEXT: or a3, a3, a4
+; RV32IMZBS-NEXT: srli a4, a7, 24
+; RV32IMZBS-NEXT: and a1, a5, ra
+; RV32IMZBS-NEXT: or a6, a1, a6
+; RV32IMZBS-NEXT: srli a5, t1, 8
+; RV32IMZBS-NEXT: slli t3, t3, 8
+; RV32IMZBS-NEXT: or a1, t2, t3
+; RV32IMZBS-NEXT: srli t2, t1, 24
+; RV32IMZBS-NEXT: and a0, a0, ra
+; RV32IMZBS-NEXT: or a0, a0, a4
+; RV32IMZBS-NEXT: slli t3, t1, 24
+; RV32IMZBS-NEXT: and a4, a5, ra
+; RV32IMZBS-NEXT: or a4, a4, t2
+; RV32IMZBS-NEXT: and a5, t1, ra
+; RV32IMZBS-NEXT: slli a5, a5, 8
+; RV32IMZBS-NEXT: or a5, t3, a5
+; RV32IMZBS-NEXT: slli t3, t0, 31
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi t2, s1, -1
+; RV32IMZBS-NEXT: addi s2, s2, -1
+; RV32IMZBS-NEXT: and t5, t2, t5
+; RV32IMZBS-NEXT: sw t5, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and t5, s2, t6
+; RV32IMZBS-NEXT: sw t5, 720(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and t2, t2, t6
+; RV32IMZBS-NEXT: sw t2, 712(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t2, a7, 31
+; RV32IMZBS-NEXT: seqz t5, s0
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: and t3, a2, t3
+; RV32IMZBS-NEXT: sw t3, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and t3, t5, t2
+; RV32IMZBS-NEXT: sw t3, 708(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, t2
+; RV32IMZBS-NEXT: sw a2, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: sw ra, 740(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, s6, ra
+; RV32IMZBS-NEXT: slli t2, a2, 8
+; RV32IMZBS-NEXT: slli a2, s6, 24
+; RV32IMZBS-NEXT: or t2, a2, t2
+; RV32IMZBS-NEXT: or a3, t2, a3
+; RV32IMZBS-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or a1, a1, a6
+; RV32IMZBS-NEXT: sw a1, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a7, ra
+; RV32IMZBS-NEXT: slli a1, a1, 8
+; RV32IMZBS-NEXT: slli s5, a7, 24
+; RV32IMZBS-NEXT: or a1, s5, a1
+; RV32IMZBS-NEXT: or a0, a1, a0
; RV32IMZBS-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 8
-; RV32IMZBS-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 16
-; RV32IMZBS-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 32
-; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 64
-; RV32IMZBS-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 128
-; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 256
-; RV32IMZBS-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 512
-; RV32IMZBS-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a4, 1024
-; RV32IMZBS-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t3, a4, t4
-; RV32IMZBS-NEXT: sw t3, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and ra, a4, s0
-; RV32IMZBS-NEXT: and s7, a4, s1
-; RV32IMZBS-NEXT: and s11, a4, t1
-; RV32IMZBS-NEXT: and s4, a4, t5
-; RV32IMZBS-NEXT: and s3, a4, s8
-; RV32IMZBS-NEXT: and s8, a4, t6
-; RV32IMZBS-NEXT: and s1, a4, s2
-; RV32IMZBS-NEXT: and t6, a4, t2
-; RV32IMZBS-NEXT: and s0, a4, s5
-; RV32IMZBS-NEXT: and t5, a4, t0
-; RV32IMZBS-NEXT: and t4, a4, s6
-; RV32IMZBS-NEXT: and s5, a4, s9
-; RV32IMZBS-NEXT: and t1, a4, s10
-; RV32IMZBS-NEXT: lui a0, 8192
-; RV32IMZBS-NEXT: and t2, a4, a0
-; RV32IMZBS-NEXT: lui a0, 16384
-; RV32IMZBS-NEXT: and t0, a4, a0
-; RV32IMZBS-NEXT: lui a0, 32768
-; RV32IMZBS-NEXT: and s10, a4, a0
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and s9, a4, a0
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and s6, a4, a0
-; RV32IMZBS-NEXT: lui a0, 262144
-; RV32IMZBS-NEXT: and a0, a4, a0
-; RV32IMZBS-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and s2, a4, a0
-; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, a0
+; RV32IMZBS-NEXT: or a4, a5, a4
+; RV32IMZBS-NEXT: sw a4, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s8, 2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 2
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 1
+; RV32IMZBS-NEXT: and s2, a0, a3
+; RV32IMZBS-NEXT: slli a3, s6, 1
+; RV32IMZBS-NEXT: and s1, a1, a3
+; RV32IMZBS-NEXT: and t2, a0, a3
+; RV32IMZBS-NEXT: andi a0, s8, 4
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 4
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 2
+; RV32IMZBS-NEXT: and s4, a0, a3
+; RV32IMZBS-NEXT: slli a3, s6, 2
+; RV32IMZBS-NEXT: and s3, a1, a3
+; RV32IMZBS-NEXT: and t6, a0, a3
+; RV32IMZBS-NEXT: andi a0, s8, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 8
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 3
+; RV32IMZBS-NEXT: and a3, a0, a3
+; RV32IMZBS-NEXT: sw a3, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s6, 3
+; RV32IMZBS-NEXT: and a1, a1, a3
+; RV32IMZBS-NEXT: sw a1, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a3
+; RV32IMZBS-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s8, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 16
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 4
+; RV32IMZBS-NEXT: and a3, a0, a3
+; RV32IMZBS-NEXT: sw a3, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s6, 4
+; RV32IMZBS-NEXT: and a1, a1, a3
+; RV32IMZBS-NEXT: sw a1, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a3
+; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s8, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 32
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 5
+; RV32IMZBS-NEXT: and a3, a0, a3
+; RV32IMZBS-NEXT: sw a3, 568(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s6, 5
+; RV32IMZBS-NEXT: and a1, a1, a3
+; RV32IMZBS-NEXT: sw a1, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a3
; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t3
-; RV32IMZBS-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, ra
-; RV32IMZBS-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s7
-; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s11
+; RV32IMZBS-NEXT: andi a0, s8, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 64
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 6
+; RV32IMZBS-NEXT: and a3, a0, a3
+; RV32IMZBS-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s6, 6
+; RV32IMZBS-NEXT: and a1, a1, a3
+; RV32IMZBS-NEXT: sw a1, 656(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a3
+; RV32IMZBS-NEXT: sw a0, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s8, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 128
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 7
+; RV32IMZBS-NEXT: and a3, a0, a3
+; RV32IMZBS-NEXT: sw a3, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s6, 7
+; RV32IMZBS-NEXT: and a1, a1, a3
+; RV32IMZBS-NEXT: sw a1, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a3
+; RV32IMZBS-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s8, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 256
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 8
+; RV32IMZBS-NEXT: and a3, a0, a3
+; RV32IMZBS-NEXT: sw a3, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s6, 8
+; RV32IMZBS-NEXT: and a1, a1, a3
+; RV32IMZBS-NEXT: sw a1, 532(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a3
+; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s8, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 512
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 9
+; RV32IMZBS-NEXT: and a3, a0, a3
+; RV32IMZBS-NEXT: sw a3, 572(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s6, 9
+; RV32IMZBS-NEXT: and a1, a1, a3
+; RV32IMZBS-NEXT: sw a1, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a3
; RV32IMZBS-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s4
-; RV32IMZBS-NEXT: sw a0, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s3
+; RV32IMZBS-NEXT: andi a0, s8, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, s9, 1024
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, s7, 10
+; RV32IMZBS-NEXT: and a3, a0, a3
+; RV32IMZBS-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, s6, 10
+; RV32IMZBS-NEXT: and a1, a1, a3
+; RV32IMZBS-NEXT: sw a1, 664(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a3
+; RV32IMZBS-NEXT: sw a0, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a3, s8
+; RV32IMZBS-NEXT: bexti a0, a3, 11
+; RV32IMZBS-NEXT: addi a1, a0, -1
+; RV32IMZBS-NEXT: not a0, s9
+; RV32IMZBS-NEXT: bexti a4, a0, 11
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 11
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 456(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 11
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 468(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 504(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 12
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 12
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 12
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 448(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 12
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 452(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 488(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 13
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 13
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 13
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 508(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 13
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 520(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 14
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 14
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 14
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 14
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 636(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 15
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 15
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 15
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 644(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 15
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 16
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 16
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 16
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 412(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 16
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 420(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 436(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 17
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 17
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 17
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 404(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 17
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 408(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 428(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 18
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 18
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 18
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 440(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 18
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 444(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 476(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 19
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 19
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 19
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 496(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 19
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 512(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 20
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 20
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 20
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 556(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 20
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 560(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 576(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 21
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 21
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 21
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 21
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 22
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 22
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 22
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 380(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 22
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 384(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 392(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 23
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 23
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 23
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 372(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a5, s6, 23
+; RV32IMZBS-NEXT: and a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 376(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a5
+; RV32IMZBS-NEXT: sw a1, 388(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 24
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a4, a0, 24
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli a5, s7, 24
+; RV32IMZBS-NEXT: and a5, a1, a5
+; RV32IMZBS-NEXT: sw a5, 396(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a4, a4, a2
+; RV32IMZBS-NEXT: sw a4, 400(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 416(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 25
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 25
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a4, s7, 25
+; RV32IMZBS-NEXT: and a4, a1, a4
+; RV32IMZBS-NEXT: sw a4, 424(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s6, 25
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 432(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 460(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 26
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 26
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a4, s7, 26
+; RV32IMZBS-NEXT: and a4, a1, a4
+; RV32IMZBS-NEXT: sw a4, 464(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s6, 26
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 472(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 480(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 27
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 27
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a4, s7, 27
+; RV32IMZBS-NEXT: and a4, a1, a4
+; RV32IMZBS-NEXT: sw a4, 484(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s6, 27
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 492(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 500(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 28
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 28
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a4, s7, 28
+; RV32IMZBS-NEXT: and a4, a1, a4
+; RV32IMZBS-NEXT: sw a4, 528(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s6, 28
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 544(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a3, 29
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 29
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a4, s7, 29
+; RV32IMZBS-NEXT: and a4, a1, a4
+; RV32IMZBS-NEXT: sw a4, 356(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, s6, 29
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 364(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a5, a1, a4
+; RV32IMZBS-NEXT: andi a1, s8, 1
+; RV32IMZBS-NEXT: andi a2, s9, 1
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a4, s7, 30
+; RV32IMZBS-NEXT: and a6, a1, s7
+; RV32IMZBS-NEXT: sw a6, 324(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, s6
+; RV32IMZBS-NEXT: sw a2, 340(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and s7, a1, s6
+; RV32IMZBS-NEXT: slli s6, s6, 30
+; RV32IMZBS-NEXT: bexti a1, a3, 30
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and a4, a1, a4
+; RV32IMZBS-NEXT: sw a4, 320(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, s6
; RV32IMZBS-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s8
-; RV32IMZBS-NEXT: sw a0, 324(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mv t3, s8
-; RV32IMZBS-NEXT: mul a0, a1, s1
-; RV32IMZBS-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t6
-; RV32IMZBS-NEXT: sw a0, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s0
-; RV32IMZBS-NEXT: sw a0, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t5
-; RV32IMZBS-NEXT: sw a0, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t4
-; RV32IMZBS-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s5
-; RV32IMZBS-NEXT: sw a0, 320(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t1
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t2
-; RV32IMZBS-NEXT: sw a0, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, t0
-; RV32IMZBS-NEXT: sw a0, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s10
-; RV32IMZBS-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s9
-; RV32IMZBS-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s6
-; RV32IMZBS-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s8, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a1, s8
-; RV32IMZBS-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a1, s2
-; RV32IMZBS-NEXT: mv a1, s2
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 608(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s2, a2, a0
-; RV32IMZBS-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a2, a0
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, ra
-; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s7
-; RV32IMZBS-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s11
-; RV32IMZBS-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s4
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s3
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t3
-; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s1
-; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t6
-; RV32IMZBS-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s0
-; RV32IMZBS-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t5
-; RV32IMZBS-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t4
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s5
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t1
-; RV32IMZBS-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t2
-; RV32IMZBS-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, t0
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s10
-; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s9
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s6
-; RV32IMZBS-NEXT: sw a0, 684(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, s8
-; RV32IMZBS-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a2, a1
-; RV32IMZBS-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli a0, a2, 24
-; RV32IMZBS-NEXT: lw s10, 744(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a1, a2, s10
-; RV32IMZBS-NEXT: slli a1, a1, 8
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli a0, a4, 8
-; RV32IMZBS-NEXT: and a0, a0, s10
-; RV32IMZBS-NEXT: srli a1, a4, 24
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli a0, a4, 24
-; RV32IMZBS-NEXT: and a1, a4, s10
-; RV32IMZBS-NEXT: slli a1, a1, 8
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 412(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 408(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 400(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 396(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 392(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 380(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 340(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 388(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 324(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 384(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 320(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 380(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 376(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 368(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 312(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 404(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 364(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 372(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 356(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: and a0, a1, s6
; RV32IMZBS-NEXT: sw a0, 368(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 344(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 364(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 328(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: andi a0, t1, 2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 2
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 1
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 316(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 1
+; RV32IMZBS-NEXT: and s6, a1, a2
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 360(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 316(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 356(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s2, a0
-; RV32IMZBS-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 296(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: andi a0, t1, 4
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 4
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 2
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 292(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 2
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 304(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 348(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: srli a0, a3, 8
-; RV32IMZBS-NEXT: and a0, a0, s10
-; RV32IMZBS-NEXT: srli a1, a3, 24
-; RV32IMZBS-NEXT: or a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 340(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 2
-; RV32IMZBS-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 1
-; RV32IMZBS-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 4
-; RV32IMZBS-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 8
-; RV32IMZBS-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 16
+; RV32IMZBS-NEXT: andi a0, t1, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 8
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 3
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 268(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 3
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 276(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 32
-; RV32IMZBS-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 64
-; RV32IMZBS-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 128
-; RV32IMZBS-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 256
-; RV32IMZBS-NEXT: sw a0, 292(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 512
-; RV32IMZBS-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a0, a5, 1024
-; RV32IMZBS-NEXT: sw a0, 276(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s10, 740(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, a5, s10
-; RV32IMZBS-NEXT: sw a0, 264(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t6, 1
-; RV32IMZBS-NEXT: and a0, a5, t6
-; RV32IMZBS-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s0, 2
-; RV32IMZBS-NEXT: and a0, a5, s0
-; RV32IMZBS-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s4, 4
-; RV32IMZBS-NEXT: and a0, a5, s4
-; RV32IMZBS-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s5, 8
-; RV32IMZBS-NEXT: and a0, a5, s5
-; RV32IMZBS-NEXT: sw a0, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: and a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 216(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s6, 32
-; RV32IMZBS-NEXT: and a0, a5, s6
-; RV32IMZBS-NEXT: sw a0, 188(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s7, 64
-; RV32IMZBS-NEXT: and a0, a5, s7
-; RV32IMZBS-NEXT: sw a0, 180(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s2, 128
-; RV32IMZBS-NEXT: and a0, a5, s2
-; RV32IMZBS-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s11, 256
-; RV32IMZBS-NEXT: and a0, a5, s11
-; RV32IMZBS-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 512
-; RV32IMZBS-NEXT: and a0, a5, t0
-; RV32IMZBS-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 1024
-; RV32IMZBS-NEXT: and a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 2048
-; RV32IMZBS-NEXT: and a0, a5, a0
-; RV32IMZBS-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 4096
-; RV32IMZBS-NEXT: and a1, a5, a0
-; RV32IMZBS-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui s3, 8192
-; RV32IMZBS-NEXT: and a1, a5, s3
-; RV32IMZBS-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 16384
-; RV32IMZBS-NEXT: and a1, a5, a1
-; RV32IMZBS-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 32768
-; RV32IMZBS-NEXT: and a1, a5, a1
-; RV32IMZBS-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 65536
-; RV32IMZBS-NEXT: and a1, a5, a1
-; RV32IMZBS-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 131072
-; RV32IMZBS-NEXT: and a1, a5, a1
-; RV32IMZBS-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 262144
-; RV32IMZBS-NEXT: and a1, a5, a1
-; RV32IMZBS-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a1, 524288
-; RV32IMZBS-NEXT: and a1, a5, a1
-; RV32IMZBS-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a2, a7, 2
-; RV32IMZBS-NEXT: sw a2, 32(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a1, a7, 1
-; RV32IMZBS-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a4, a7, 4
-; RV32IMZBS-NEXT: sw a4, 12(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a5, a7, 8
-; RV32IMZBS-NEXT: sw a5, 16(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t1, a7, 16
-; RV32IMZBS-NEXT: sw t1, 20(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t2, a7, 32
-; RV32IMZBS-NEXT: sw t2, 0(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t2, a7, 64
-; RV32IMZBS-NEXT: sw t2, 24(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t3, a7, 128
-; RV32IMZBS-NEXT: sw t3, 28(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi s9, a7, 256
-; RV32IMZBS-NEXT: andi ra, a7, 512
-; RV32IMZBS-NEXT: andi t5, a7, 1024
-; RV32IMZBS-NEXT: and t4, a7, s10
-; RV32IMZBS-NEXT: and s8, a7, t6
-; RV32IMZBS-NEXT: and t6, a7, s0
-; RV32IMZBS-NEXT: sw t6, 300(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s1, a7, s4
-; RV32IMZBS-NEXT: and s4, a7, s5
-; RV32IMZBS-NEXT: lui s0, 16
-; RV32IMZBS-NEXT: and s0, a7, s0
-; RV32IMZBS-NEXT: and s5, a7, s6
-; RV32IMZBS-NEXT: and s6, a7, s7
-; RV32IMZBS-NEXT: and s7, a7, s2
-; RV32IMZBS-NEXT: and s2, a7, s11
-; RV32IMZBS-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and t0, a7, t0
-; RV32IMZBS-NEXT: sw t0, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 1024
-; RV32IMZBS-NEXT: and t0, a7, t0
-; RV32IMZBS-NEXT: sw t0, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui t0, 2048
-; RV32IMZBS-NEXT: and t0, a7, t0
-; RV32IMZBS-NEXT: sw t0, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s2, a7, a0
-; RV32IMZBS-NEXT: and s3, a7, s3
-; RV32IMZBS-NEXT: lui a0, 16384
-; RV32IMZBS-NEXT: and a0, a7, a0
-; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 32768
-; RV32IMZBS-NEXT: and t0, a7, a0
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and s11, a7, a0
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and a0, a7, a0
-; RV32IMZBS-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 262144
-; RV32IMZBS-NEXT: and a0, a7, a0
-; RV32IMZBS-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and s10, a7, a0
-; RV32IMZBS-NEXT: mul a0, a6, a2
-; RV32IMZBS-NEXT: sw a0, 196(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, a1
-; RV32IMZBS-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, a4
-; RV32IMZBS-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, a5
-; RV32IMZBS-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, t1
-; RV32IMZBS-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a1, 0(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a1
-; RV32IMZBS-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, t2
-; RV32IMZBS-NEXT: sw a0, 152(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, t3
-; RV32IMZBS-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s9
-; RV32IMZBS-NEXT: sw a0, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, ra
-; RV32IMZBS-NEXT: sw a0, 136(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, t5
-; RV32IMZBS-NEXT: sw a0, 192(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, t4
-; RV32IMZBS-NEXT: sw a0, 212(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s8
-; RV32IMZBS-NEXT: mv a2, s8
-; RV32IMZBS-NEXT: sw a0, 208(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, t6
-; RV32IMZBS-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s1
-; RV32IMZBS-NEXT: mv t6, s1
-; RV32IMZBS-NEXT: sw a0, 184(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s4
-; RV32IMZBS-NEXT: mv s1, s4
-; RV32IMZBS-NEXT: sw a0, 328(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s0
-; RV32IMZBS-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s5
-; RV32IMZBS-NEXT: mv s4, s5
-; RV32IMZBS-NEXT: sw a0, 200(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s6
-; RV32IMZBS-NEXT: mv s5, s6
-; RV32IMZBS-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s7
-; RV32IMZBS-NEXT: mv s6, s7
-; RV32IMZBS-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s8, 4(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, s8
-; RV32IMZBS-NEXT: sw a0, 324(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 436(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 176(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 172(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s2
+; RV32IMZBS-NEXT: andi a0, t1, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 16
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 4
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 248(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 4
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 256(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 288(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s3
-; RV32IMZBS-NEXT: sw a0, 168(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 320(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, t0
-; RV32IMZBS-NEXT: sw a0, 428(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s11
-; RV32IMZBS-NEXT: sw a0, 440(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 164(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a6, a0
-; RV32IMZBS-NEXT: sw a0, 160(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a6, s10
-; RV32IMZBS-NEXT: mv a6, s10
-; RV32IMZBS-NEXT: sw a0, 280(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 156(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 332(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 144(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 316(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 132(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 124(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 272(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 296(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 120(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 292(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 116(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 268(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 108(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
+; RV32IMZBS-NEXT: andi a0, t1, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 32
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 5
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 224(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 5
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 264(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 304(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 316(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 256(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 296(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 104(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 312(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 336(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s10, a3, a0
-; RV32IMZBS-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 252(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 292(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 308(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 332(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
+; RV32IMZBS-NEXT: andi a0, t1, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 64
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 6
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 296(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 6
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 308(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 344(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 232(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a5, a3, a0
-; RV32IMZBS-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t1, a3, a0
-; RV32IMZBS-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t2, a3, a0
-; RV32IMZBS-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t3, a3, a0
-; RV32IMZBS-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a3, a1
-; RV32IMZBS-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s7, a3, a0
-; RV32IMZBS-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 284(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s9, a3, s9
-; RV32IMZBS-NEXT: mul a0, a3, ra
-; RV32IMZBS-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a3, t5
-; RV32IMZBS-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a3, t4
-; RV32IMZBS-NEXT: sw a0, 216(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a3, a2
-; RV32IMZBS-NEXT: sw a0, 276(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 300(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t6, a3, t6
-; RV32IMZBS-NEXT: mul t4, a3, s1
-; RV32IMZBS-NEXT: mul a0, a3, s0
-; RV32IMZBS-NEXT: sw a0, 188(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a3, s4
-; RV32IMZBS-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a3, s5
-; RV32IMZBS-NEXT: sw a0, 300(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s0, a3, s6
-; RV32IMZBS-NEXT: mul s1, a3, s8
-; RV32IMZBS-NEXT: lw a0, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a3, a0
-; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 140(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s2, a3, s2
-; RV32IMZBS-NEXT: mul s3, a3, s3
-; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul t5, a3, a0
-; RV32IMZBS-NEXT: mul a0, a3, t0
-; RV32IMZBS-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a3, s11
-; RV32IMZBS-NEXT: sw a0, 180(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a3, a6
-; RV32IMZBS-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli s11, a3, 24
-; RV32IMZBS-NEXT: lw a0, 744(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a3, a3, a0
-; RV32IMZBS-NEXT: slli a3, a3, 8
-; RV32IMZBS-NEXT: or a3, s11, a3
-; RV32IMZBS-NEXT: srli s11, a7, 8
-; RV32IMZBS-NEXT: and s11, s11, a0
-; RV32IMZBS-NEXT: srli ra, a7, 24
-; RV32IMZBS-NEXT: or a4, s11, ra
-; RV32IMZBS-NEXT: slli ra, a7, 24
-; RV32IMZBS-NEXT: and a7, a7, a0
-; RV32IMZBS-NEXT: slli a7, a7, 8
-; RV32IMZBS-NEXT: or a6, ra, a7
-; RV32IMZBS-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a0, a1
-; RV32IMZBS-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s4, a0, a1
-; RV32IMZBS-NEXT: lw a0, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, a0, a1
-; RV32IMZBS-NEXT: lw a0, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, a0, a1
-; RV32IMZBS-NEXT: lw a0, 212(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, a0, a1
-; RV32IMZBS-NEXT: lw a0, 204(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, a0, a1
-; RV32IMZBS-NEXT: lw a0, 176(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor ra, a0, a1
-; RV32IMZBS-NEXT: lw a0, 164(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 156(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 148(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, a1, a0
-; RV32IMZBS-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 132(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 248(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 244(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 120(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 240(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 236(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 228(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, s10
-; RV32IMZBS-NEXT: sw a0, 224(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 64(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: andi a0, t1, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 128
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 7
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 184(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 7
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
; RV32IMZBS-NEXT: sw a0, 220(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, a1, a0
-; RV32IMZBS-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: andi a0, t1, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 256
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 8
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 156(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 8
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 204(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 512
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 9
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 228(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 9
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 260(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, t1, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: andi a1, t4, 1024
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t0, 10
+; RV32IMZBS-NEXT: and a2, a0, a2
+; RV32IMZBS-NEXT: sw a2, 328(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a7, 10
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 336(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 352(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a1, t1
+; RV32IMZBS-NEXT: bexti a0, a1, 11
+; RV32IMZBS-NEXT: addi a2, a0, -1
+; RV32IMZBS-NEXT: not a0, t4
+; RV32IMZBS-NEXT: bexti a3, a0, 11
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 11
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 108(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 11
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 120(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 140(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 12
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 12
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 92(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 12
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 96(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 132(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 13
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 13
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 144(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 13
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 152(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 188(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 14
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 14
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 244(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 14
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 252(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 272(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 15
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 15
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 280(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 15
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 284(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 300(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 16
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 16
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 56(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 16
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 60(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 17
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 17
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 44(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 17
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 52(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 64(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 18
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 18
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 80(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 18
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 84(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 112(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 19
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 19
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 148(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 19
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 160(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 180(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 20
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 20
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 196(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 20
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 200(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 208(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 21
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 21
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 212(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a4, a7, 21
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 216(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, a2, a4
+; RV32IMZBS-NEXT: sw a2, 232(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 22
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: bexti a3, a0, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t0, 22
+; RV32IMZBS-NEXT: and a4, a2, a4
+; RV32IMZBS-NEXT: sw a4, 28(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, a7, 22
+; RV32IMZBS-NEXT: and a4, a3, a6
+; RV32IMZBS-NEXT: and a2, a2, a6
+; RV32IMZBS-NEXT: sw a2, 32(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a2, a1, 23
+; RV32IMZBS-NEXT: addi a6, a2, -1
+; RV32IMZBS-NEXT: bexti a2, a0, 23
+; RV32IMZBS-NEXT: addi a3, a2, -1
+; RV32IMZBS-NEXT: slli a2, t0, 23
+; RV32IMZBS-NEXT: and a2, a6, a2
+; RV32IMZBS-NEXT: sw a2, 24(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli t3, a7, 23
+; RV32IMZBS-NEXT: and a3, a3, t3
+; RV32IMZBS-NEXT: and a6, a6, t3
+; RV32IMZBS-NEXT: bexti t3, a1, 24
+; RV32IMZBS-NEXT: addi s0, t3, -1
+; RV32IMZBS-NEXT: bexti t3, a0, 24
+; RV32IMZBS-NEXT: addi t5, t3, -1
+; RV32IMZBS-NEXT: slli t3, t0, 24
+; RV32IMZBS-NEXT: and a2, s0, t3
+; RV32IMZBS-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, t5, s5
+; RV32IMZBS-NEXT: sw a2, 40(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, s0, s5
+; RV32IMZBS-NEXT: sw a2, 48(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s5, a1, 25
+; RV32IMZBS-NEXT: addi s5, s5, -1
+; RV32IMZBS-NEXT: bexti s8, a0, 25
+; RV32IMZBS-NEXT: addi s9, s8, -1
+; RV32IMZBS-NEXT: slli s8, t0, 25
+; RV32IMZBS-NEXT: and a2, s5, s8
+; RV32IMZBS-NEXT: sw a2, 72(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s10, a7, 25
+; RV32IMZBS-NEXT: and a2, s9, s10
+; RV32IMZBS-NEXT: sw a2, 76(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, s5, s10
+; RV32IMZBS-NEXT: sw a2, 88(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s5, a1, 26
+; RV32IMZBS-NEXT: addi s5, s5, -1
+; RV32IMZBS-NEXT: bexti s10, a0, 26
+; RV32IMZBS-NEXT: addi s10, s10, -1
+; RV32IMZBS-NEXT: slli s11, t0, 26
+; RV32IMZBS-NEXT: and a2, s5, s11
+; RV32IMZBS-NEXT: sw a2, 100(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s11, a7, 26
+; RV32IMZBS-NEXT: and a2, s10, s11
+; RV32IMZBS-NEXT: sw a2, 104(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, s5, s11
+; RV32IMZBS-NEXT: sw a2, 116(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s5, a1, 27
+; RV32IMZBS-NEXT: addi s5, s5, -1
+; RV32IMZBS-NEXT: bexti s10, a0, 27
+; RV32IMZBS-NEXT: addi s10, s10, -1
+; RV32IMZBS-NEXT: slli s11, t0, 27
+; RV32IMZBS-NEXT: and a2, s5, s11
+; RV32IMZBS-NEXT: sw a2, 124(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s11, a7, 27
+; RV32IMZBS-NEXT: and a2, s10, s11
+; RV32IMZBS-NEXT: sw a2, 128(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, s5, s11
+; RV32IMZBS-NEXT: sw a2, 136(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s5, a1, 28
+; RV32IMZBS-NEXT: addi s5, s5, -1
+; RV32IMZBS-NEXT: bexti s10, a0, 28
+; RV32IMZBS-NEXT: addi s10, s10, -1
+; RV32IMZBS-NEXT: slli s11, t0, 28
+; RV32IMZBS-NEXT: and a2, s5, s11
+; RV32IMZBS-NEXT: sw a2, 168(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli s11, a7, 28
+; RV32IMZBS-NEXT: and a2, s10, s11
+; RV32IMZBS-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: and a2, s5, s11
+; RV32IMZBS-NEXT: sw a2, 172(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti s5, a1, 29
+; RV32IMZBS-NEXT: addi s5, s5, -1
+; RV32IMZBS-NEXT: bexti s10, a0, 29
+; RV32IMZBS-NEXT: addi s10, s10, -1
+; RV32IMZBS-NEXT: slli s11, t0, 29
+; RV32IMZBS-NEXT: and s11, s5, s11
+; RV32IMZBS-NEXT: slli ra, a7, 29
+; RV32IMZBS-NEXT: and s0, s10, ra
+; RV32IMZBS-NEXT: and s8, s5, ra
+; RV32IMZBS-NEXT: andi t1, t1, 1
+; RV32IMZBS-NEXT: andi t4, t4, 1
+; RV32IMZBS-NEXT: seqz t1, t1
+; RV32IMZBS-NEXT: seqz t4, t4
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: addi t4, t4, -1
+; RV32IMZBS-NEXT: slli ra, t0, 30
+; RV32IMZBS-NEXT: and t0, t1, t0
+; RV32IMZBS-NEXT: and t4, t4, a7
+; RV32IMZBS-NEXT: and a2, t1, a7
+; RV32IMZBS-NEXT: sw a2, 20(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, a7, 30
+; RV32IMZBS-NEXT: bexti a1, a1, 30
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and ra, a1, ra
+; RV32IMZBS-NEXT: and a0, a0, a7
+; RV32IMZBS-NEXT: and a1, a1, a7
+; RV32IMZBS-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 324(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, s2
+; RV32IMZBS-NEXT: sw a1, 324(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, s4, a1
+; RV32IMZBS-NEXT: sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 568(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 448(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 404(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 456(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 372(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 448(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 320(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 412(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 340(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, s1
+; RV32IMZBS-NEXT: sw a1, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, s3, a1
+; RV32IMZBS-NEXT: sw a1, 404(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 580(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 464(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 456(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 448(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 464(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 424(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 460(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a5, t1, a5
-; RV32IMZBS-NEXT: xor t1, t2, t3
-; RV32IMZBS-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, a0, a1
-; RV32IMZBS-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, s9, a0
-; RV32IMZBS-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, a0, t6
-; RV32IMZBS-NEXT: xor s0, s0, s1
-; RV32IMZBS-NEXT: xor s1, s2, s3
-; RV32IMZBS-NEXT: lw a0, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 452(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 452(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a1, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 488(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 416(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 412(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 492(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 408(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 400(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, a0, a1
-; RV32IMZBS-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 396(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, a1, a0
-; RV32IMZBS-NEXT: lw a0, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 392(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 456(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 452(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 408(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 420(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a1, 384(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 380(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 376(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 404(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 372(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 376(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 408(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 332(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 384(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 316(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, t0, a1
+; RV32IMZBS-NEXT: sw a1, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 292(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 268(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 380(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 224(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 376(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 156(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 372(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 92(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 364(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 44(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 356(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a1, 28(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 24(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, a1, a2
+; RV32IMZBS-NEXT: xor s11, s11, ra
+; RV32IMZBS-NEXT: xor ra, t4, s6
+; RV32IMZBS-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 276(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s5, a1, a2
+; RV32IMZBS-NEXT: lw a1, 256(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 236(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, a1, s1
+; RV32IMZBS-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 176(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, a1, a2
+; RV32IMZBS-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 96(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, a1, a2
+; RV32IMZBS-NEXT: lw a1, 60(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 52(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, a1, a2
+; RV32IMZBS-NEXT: xor s6, a4, a3
+; RV32IMZBS-NEXT: xor s0, s0, a0
+; RV32IMZBS-NEXT: xor s7, s7, t2
+; RV32IMZBS-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t6, a0
+; RV32IMZBS-NEXT: lw a0, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a0, a1
+; RV32IMZBS-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, a0, a1
+; RV32IMZBS-NEXT: lw a0, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 488(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a0, a1
+; RV32IMZBS-NEXT: lw a0, 436(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, a0, a1
+; RV32IMZBS-NEXT: lw a0, 392(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 388(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, a0, a1
+; RV32IMZBS-NEXT: lw a0, 368(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, a5, a0
+; RV32IMZBS-NEXT: lw a0, 360(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 20(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a1, a0
+; RV32IMZBS-NEXT: lw a0, 348(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 312(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 368(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 364(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 360(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 356(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 288(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 264(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: lw a2, 220(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 204(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a3
+; RV32IMZBS-NEXT: lw a3, 140(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 132(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: lw a4, 68(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 64(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, a6
+; RV32IMZBS-NEXT: lw a6, 16(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, s8, a6
+; RV32IMZBS-NEXT: lw s8, 324(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 12(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s8, s9
+; RV32IMZBS-NEXT: sw s8, 568(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 564(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 440(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 456(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 532(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 396(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 448(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 412(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 404(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s8, s9
+; RV32IMZBS-NEXT: sw s8, 488(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 468(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 456(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 520(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 452(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 452(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 444(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 420(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 448(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 400(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 408(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 444(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 720(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 384(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 720(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 380(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s8, s9
+; RV32IMZBS-NEXT: sw s8, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 296(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 376(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 656(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 228(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 372(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 144(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 364(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: sw s8, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s8, 80(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 356(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, s9, s8
+; RV32IMZBS-NEXT: lw s8, 36(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, s10, s8
+; RV32IMZBS-NEXT: lw s8, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s11, s8
+; RV32IMZBS-NEXT: sw s8, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor s8, ra, s5
+; RV32IMZBS-NEXT: lw s5, 308(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, s5
+; RV32IMZBS-NEXT: sw s1, 632(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s1, 240(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s2, s1
+; RV32IMZBS-NEXT: sw s1, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s1, 152(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s3, s1
+; RV32IMZBS-NEXT: sw s1, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s1, 84(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s4, s1
+; RV32IMZBS-NEXT: sw s1, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s6, s1
+; RV32IMZBS-NEXT: sw s1, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw s1, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, s0, s1
+; RV32IMZBS-NEXT: sw s0, 708(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor t0, s7, t0
+; RV32IMZBS-NEXT: sw t0, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t1, t0
+; RV32IMZBS-NEXT: lw t0, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t2, t0
+; RV32IMZBS-NEXT: sw t0, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t3, t0
+; RV32IMZBS-NEXT: sw t0, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 476(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t4, t0
+; RV32IMZBS-NEXT: sw t0, 592(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 416(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t5, t0
+; RV32IMZBS-NEXT: sw t0, 580(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw t0, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t6, t0
+; RV32IMZBS-NEXT: sw t0, 712(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: xor a0, a7, a0
+; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 344(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 352(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 348(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 340(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: or a0, a3, a0
-; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: or a0, a6, a4
-; RV32IMZBS-NEXT: sw a0, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a3, a7, s4
-; RV32IMZBS-NEXT: lw a0, 152(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, s5, a0
-; RV32IMZBS-NEXT: lw a0, 136(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, s6, a0
-; RV32IMZBS-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, s8, a0
-; RV32IMZBS-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 288(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 448(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 280(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 248(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a0
-; RV32IMZBS-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 444(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 268(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 432(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 264(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 424(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 256(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 420(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 416(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 232(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, s10, a0
-; RV32IMZBS-NEXT: sw a0, 484(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
+; RV32IMZBS-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 188(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a0
+; RV32IMZBS-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a4, a0
; RV32IMZBS-NEXT: sw a0, 508(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 500(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 464(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 496(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 460(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 480(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, a5, t1
-; RV32IMZBS-NEXT: sw a0, 476(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor s9, t2, s7
-; RV32IMZBS-NEXT: lw a0, 104(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, t3, a0
-; RV32IMZBS-NEXT: xor s11, t6, t4
-; RV32IMZBS-NEXT: xor ra, s0, a2
-; RV32IMZBS-NEXT: xor s5, s1, t5
-; RV32IMZBS-NEXT: xor a0, s2, s3
-; RV32IMZBS-NEXT: sw a0, 472(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 456(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 452(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: sw a0, 468(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, a1, a0
-; RV32IMZBS-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, a1, a0
+; RV32IMZBS-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a5, a0
+; RV32IMZBS-NEXT: sw a0, 504(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a6, a0
+; RV32IMZBS-NEXT: sw a0, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a5, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a4, a5, 4
+; RV32IMZBS-NEXT: lw a1, 744(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a5, a5, a1
+; RV32IMZBS-NEXT: and a4, a4, a1
+; RV32IMZBS-NEXT: slli a5, a5, 4
+; RV32IMZBS-NEXT: or a4, a4, a5
+; RV32IMZBS-NEXT: sw a4, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a5, a0, 4
+; RV32IMZBS-NEXT: and a6, a0, a1
+; RV32IMZBS-NEXT: and a5, a5, a1
+; RV32IMZBS-NEXT: slli a6, a6, 4
+; RV32IMZBS-NEXT: or a0, a5, a6
+; RV32IMZBS-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: sw a0, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a2, 564(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, a2, a0
+; RV32IMZBS-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 496(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 424(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 468(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a2
+; RV32IMZBS-NEXT: sw a0, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 664(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 456(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s11, a2, a0
; RV32IMZBS-NEXT: lw a0, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s0, 560(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 452(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 664(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 448(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 432(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 444(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a2, a0
+; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t2, a0, 4
+; RV32IMZBS-NEXT: and t3, a0, a1
+; RV32IMZBS-NEXT: and t2, t2, a1
+; RV32IMZBS-NEXT: slli t3, t3, 4
+; RV32IMZBS-NEXT: or a0, t2, t3
+; RV32IMZBS-NEXT: sw a0, 696(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t3, a0, 4
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: and t3, t3, a1
+; RV32IMZBS-NEXT: slli a0, a0, 4
+; RV32IMZBS-NEXT: or a0, t3, a0
+; RV32IMZBS-NEXT: sw a0, 700(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: lw a0, 328(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s5, a1, a0
+; RV32IMZBS-NEXT: lw a0, 244(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, a1, a0
+; RV32IMZBS-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s9, a0
+; RV32IMZBS-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s10, a0
+; RV32IMZBS-NEXT: lw a0, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, s8, a0
+; RV32IMZBS-NEXT: lw a0, 336(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s0, 624(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor s0, s0, a0
-; RV32IMZBS-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s1, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 252(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s1, 620(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor s1, s1, a0
-; RV32IMZBS-NEXT: lw a0, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, a1, a0
-; RV32IMZBS-NEXT: lw a0, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, a1, a0
-; RV32IMZBS-NEXT: xor s4, a3, a4
-; RV32IMZBS-NEXT: lw a0, 192(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, a6, a0
-; RV32IMZBS-NEXT: lw a0, 184(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, a7, a0
-; RV32IMZBS-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 544(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 160(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 616(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t4, a1, a0
-; RV32IMZBS-NEXT: lw a0, 168(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 448(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 608(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 444(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, t0, a0
-; RV32IMZBS-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 432(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, a0, t1
+; RV32IMZBS-NEXT: lw a0, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, a1, a0
+; RV32IMZBS-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 628(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, a1, a0
+; RV32IMZBS-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 592(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a1, a0
+; RV32IMZBS-NEXT: lw a0, 460(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, a1, a0
+; RV32IMZBS-NEXT: lw a0, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a0, a1
+; RV32IMZBS-NEXT: lw a0, 352(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 520(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a0
+; RV32IMZBS-NEXT: lw a0, 272(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a3, a0
+; RV32IMZBS-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 508(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a1, a0
+; RV32IMZBS-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 504(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a0
+; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor ra, a0, ra
+; RV32IMZBS-NEXT: lw a0, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 588(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a1, a0
-; RV32IMZBS-NEXT: lw a1, 304(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 424(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 556(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 564(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: lw a2, 296(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a3, 420(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: lw a3, 292(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 416(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: lw a4, 484(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a5, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: lw a5, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a6, 508(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a6, a5
-; RV32IMZBS-NEXT: lw a6, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a7, 500(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a7, a6
-; RV32IMZBS-NEXT: lw a7, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw t0, 496(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, t0, a7
-; RV32IMZBS-NEXT: lw t0, 504(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw t1, 480(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t1, t0
-; RV32IMZBS-NEXT: lw t1, 476(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s9, t1, s9
-; RV32IMZBS-NEXT: lw t1, 216(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, s10, t1
-; RV32IMZBS-NEXT: lw t1, 188(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s11, s11, t1
-; RV32IMZBS-NEXT: lw t1, 140(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor ra, ra, t1
-; RV32IMZBS-NEXT: lw t1, 76(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, s5, t1
-; RV32IMZBS-NEXT: lw s5, 472(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, s5, s6
-; RV32IMZBS-NEXT: sw s5, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw s5, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw s6, 468(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, s6, s5
-; RV32IMZBS-NEXT: lw s5, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, s7, s5
-; RV32IMZBS-NEXT: lw s5, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s8, s8, s5
-; RV32IMZBS-NEXT: lw s5, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s0, s5, s0
-; RV32IMZBS-NEXT: lw s5, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s1, s5
-; RV32IMZBS-NEXT: lw s5, 616(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, s2, s5
-; RV32IMZBS-NEXT: lw s5, 608(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s3, s3, s5
-; RV32IMZBS-NEXT: xor t2, s4, t2
-; RV32IMZBS-NEXT: lw s4, 328(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, s4
-; RV32IMZBS-NEXT: lw s4, 324(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s4, t4, s4
-; RV32IMZBS-NEXT: lw t4, 320(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t5, t4
-; RV32IMZBS-NEXT: xor t6, t6, a0
-; RV32IMZBS-NEXT: lw a0, 316(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t5, a1, a0
-; RV32IMZBS-NEXT: lw a0, 312(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a0
-; RV32IMZBS-NEXT: lw a0, 308(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a0
-; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a0
-; RV32IMZBS-NEXT: lw a0, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a0
-; RV32IMZBS-NEXT: lw a0, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, a0
-; RV32IMZBS-NEXT: lw a0, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a7, a0
-; RV32IMZBS-NEXT: sw a0, 688(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, a0
-; RV32IMZBS-NEXT: lw a0, 284(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, s9, a0
-; RV32IMZBS-NEXT: lw a0, 276(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s9, s10, a0
-; RV32IMZBS-NEXT: lw a0, 260(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s10, s11, a0
-; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 464(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s8, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, s8, a2
+; RV32IMZBS-NEXT: lw s8, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s11, s8, s11
+; RV32IMZBS-NEXT: lw s8, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s9, 664(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s8, s9, s8
+; RV32IMZBS-NEXT: lw s9, 560(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s10, 604(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, s10, s9
+; RV32IMZBS-NEXT: lw s10, 472(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw s7, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s10, s7, s10
+; RV32IMZBS-NEXT: lw s7, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s5, s7, s5
+; RV32IMZBS-NEXT: lw s7, 280(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s6, s6, s7
+; RV32IMZBS-NEXT: lw s7, 196(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, s7
+; RV32IMZBS-NEXT: lw s7, 100(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, s7
+; RV32IMZBS-NEXT: xor s0, s4, s0
+; RV32IMZBS-NEXT: lw s4, 284(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, s1, s4
+; RV32IMZBS-NEXT: lw s4, 200(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, s4
+; RV32IMZBS-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, s4
+; RV32IMZBS-NEXT: xor t2, t6, t2
+; RV32IMZBS-NEXT: lw t6, 668(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, t3, t6
+; RV32IMZBS-NEXT: lw t6, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t6
+; RV32IMZBS-NEXT: lw t6, 480(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t6
+; RV32IMZBS-NEXT: xor a4, t1, a4
+; RV32IMZBS-NEXT: lw t1, 300(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a5, t1
+; RV32IMZBS-NEXT: lw t1, 208(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a6, t1
+; RV32IMZBS-NEXT: lw t1, 116(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, t1
; RV32IMZBS-NEXT: xor a0, ra, a0
-; RV32IMZBS-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 180(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, a0
-; RV32IMZBS-NEXT: lw a0, 488(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s11, a0, 4
-; RV32IMZBS-NEXT: lw a1, 728(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and ra, a0, a1
-; RV32IMZBS-NEXT: and s11, s11, a1
-; RV32IMZBS-NEXT: slli ra, ra, 4
-; RV32IMZBS-NEXT: or s11, s11, ra
-; RV32IMZBS-NEXT: lw a0, 492(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli ra, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, a1
-; RV32IMZBS-NEXT: and ra, ra, a1
-; RV32IMZBS-NEXT: slli a0, a0, 4
-; RV32IMZBS-NEXT: or a0, ra, a0
-; RV32IMZBS-NEXT: lw s5, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t1, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, t1
+; RV32IMZBS-NEXT: lw t1, 484(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, t1
+; RV32IMZBS-NEXT: xor t1, s11, s8
+; RV32IMZBS-NEXT: lw t6, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t6, s9, t6
+; RV32IMZBS-NEXT: lw s4, 492(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s4, s10, s4
; RV32IMZBS-NEXT: xor s5, s5, s6
-; RV32IMZBS-NEXT: lw s6, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s6, s7, s6
-; RV32IMZBS-NEXT: lw s7, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s7, s8, s7
-; RV32IMZBS-NEXT: xor s0, s0, s1
-; RV32IMZBS-NEXT: lw s1, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s2, s1
-; RV32IMZBS-NEXT: lw s2, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s2, s3, s2
-; RV32IMZBS-NEXT: lw s8, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s3, s8, 4
-; RV32IMZBS-NEXT: and s8, s8, a1
-; RV32IMZBS-NEXT: and s3, s3, a1
-; RV32IMZBS-NEXT: slli s8, s8, 4
-; RV32IMZBS-NEXT: or s3, s3, s8
-; RV32IMZBS-NEXT: lw ra, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s8, ra, 4
-; RV32IMZBS-NEXT: and ra, ra, a1
-; RV32IMZBS-NEXT: and s8, s8, a1
-; RV32IMZBS-NEXT: slli ra, ra, 4
-; RV32IMZBS-NEXT: or s8, s8, ra
-; RV32IMZBS-NEXT: xor t2, t2, t3
-; RV32IMZBS-NEXT: lw a1, 436(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, s4, a1
-; RV32IMZBS-NEXT: lw a1, 428(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t4, t4, a1
-; RV32IMZBS-NEXT: xor a1, t6, t5
-; RV32IMZBS-NEXT: lw t5, 336(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t5
-; RV32IMZBS-NEXT: lw t5, 332(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, t5
-; RV32IMZBS-NEXT: lw t5, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, t5
-; RV32IMZBS-NEXT: lw t5, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, t5
-; RV32IMZBS-NEXT: lw t5, 300(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t6, s10, t5
-; RV32IMZBS-NEXT: lw t5, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, t5
-; RV32IMZBS-NEXT: xor s4, s5, s6
-; RV32IMZBS-NEXT: lw t5, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s5, s7, t5
-; RV32IMZBS-NEXT: xor s0, s0, s1
-; RV32IMZBS-NEXT: lw t5, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor s1, s2, t5
-; RV32IMZBS-NEXT: xor t2, t2, t3
-; RV32IMZBS-NEXT: lw t3, 440(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t4, t3
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 344(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: xor a5, a4, a5
-; RV32IMZBS-NEXT: xor a3, a5, a6
-; RV32IMZBS-NEXT: lw a5, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, t0, a5
-; RV32IMZBS-NEXT: xor a6, a7, s9
-; RV32IMZBS-NEXT: xor a6, a6, t6
-; RV32IMZBS-NEXT: lw t0, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t1, t0
-; RV32IMZBS-NEXT: xor t1, s4, s5
+; RV32IMZBS-NEXT: lw s6, 212(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, s2, s6
+; RV32IMZBS-NEXT: lw s6, 124(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, s6
; RV32IMZBS-NEXT: xor s0, s0, s1
+; RV32IMZBS-NEXT: lw s1, 216(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, t4, s1
+; RV32IMZBS-NEXT: lw s1, 128(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, s1
; RV32IMZBS-NEXT: xor t2, t2, t3
-; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 688(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: lw a3, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a5, a3
-; RV32IMZBS-NEXT: lw a5, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t3, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t3
+; RV32IMZBS-NEXT: lw t3, 500(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t3
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 232(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a5, a6, a5
-; RV32IMZBS-NEXT: lw a6, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, t0, a6
-; RV32IMZBS-NEXT: srli t0, s11, 2
-; RV32IMZBS-NEXT: lw s1, 732(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and t3, s11, s1
-; RV32IMZBS-NEXT: and t0, t0, s1
+; RV32IMZBS-NEXT: lw a6, 136(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a6
+; RV32IMZBS-NEXT: lw t3, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli a6, t3, 2
+; RV32IMZBS-NEXT: lw s6, 728(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and t3, t3, s6
+; RV32IMZBS-NEXT: and a6, a6, s6
; RV32IMZBS-NEXT: slli t3, t3, 2
-; RV32IMZBS-NEXT: or t0, t0, t3
-; RV32IMZBS-NEXT: srli t3, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, s1
-; RV32IMZBS-NEXT: and t3, t3, s1
-; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: or t3, t3, a0
-; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, t1, a0
-; RV32IMZBS-NEXT: lw t1, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, s0, t1
-; RV32IMZBS-NEXT: srli t4, s3, 2
-; RV32IMZBS-NEXT: and t5, s3, s1
-; RV32IMZBS-NEXT: and t4, t4, s1
-; RV32IMZBS-NEXT: slli t5, t5, 2
-; RV32IMZBS-NEXT: or t4, t4, t5
-; RV32IMZBS-NEXT: srli t5, s8, 2
-; RV32IMZBS-NEXT: and t6, s8, s1
-; RV32IMZBS-NEXT: and t5, t5, s1
-; RV32IMZBS-NEXT: slli t6, t6, 2
-; RV32IMZBS-NEXT: or t6, t5, t6
-; RV32IMZBS-NEXT: lw t5, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t2, t2, t5
-; RV32IMZBS-NEXT: lw t5, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, t5
-; RV32IMZBS-NEXT: xor a0, t1, a0
-; RV32IMZBS-NEXT: sw a0, 724(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: xor a0, a1, t2
-; RV32IMZBS-NEXT: sw a0, 720(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: slli a4, a4, 24
-; RV32IMZBS-NEXT: lw t1, 744(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a0, a2, t1
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a4, a4, a0
-; RV32IMZBS-NEXT: xor a3, a2, a3
-; RV32IMZBS-NEXT: srli a2, a2, 8
-; RV32IMZBS-NEXT: and a0, a2, t1
-; RV32IMZBS-NEXT: srli a3, a3, 24
-; RV32IMZBS-NEXT: or a3, a0, a3
-; RV32IMZBS-NEXT: slli a7, a7, 24
-; RV32IMZBS-NEXT: and a0, a5, t1
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a1, a7, a0
-; RV32IMZBS-NEXT: xor a0, a5, a6
-; RV32IMZBS-NEXT: srli a5, a5, 8
-; RV32IMZBS-NEXT: and a2, a5, t1
-; RV32IMZBS-NEXT: srli a0, a0, 24
-; RV32IMZBS-NEXT: or a2, a2, a0
-; RV32IMZBS-NEXT: srli a0, t0, 1
-; RV32IMZBS-NEXT: lw t1, 736(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a5, t0, t1
-; RV32IMZBS-NEXT: and a0, a0, t1
+; RV32IMZBS-NEXT: or a6, a6, t3
+; RV32IMZBS-NEXT: lw s1, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t3, s1, 2
+; RV32IMZBS-NEXT: and s1, s1, s6
+; RV32IMZBS-NEXT: and t3, t3, s6
+; RV32IMZBS-NEXT: slli s1, s1, 2
+; RV32IMZBS-NEXT: or t3, t3, s1
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: lw a1, 528(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a2, a1
+; RV32IMZBS-NEXT: xor a2, t1, t6
+; RV32IMZBS-NEXT: lw t1, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, s4, t1
+; RV32IMZBS-NEXT: lw s1, 696(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t6, s1, 2
+; RV32IMZBS-NEXT: and s1, s1, s6
+; RV32IMZBS-NEXT: and t6, t6, s6
+; RV32IMZBS-NEXT: slli s1, s1, 2
+; RV32IMZBS-NEXT: or t6, t6, s1
+; RV32IMZBS-NEXT: lw s4, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli s1, s4, 2
+; RV32IMZBS-NEXT: and s4, s4, s6
+; RV32IMZBS-NEXT: and s1, s1, s6
+; RV32IMZBS-NEXT: slli s4, s4, 2
+; RV32IMZBS-NEXT: or s1, s1, s4
+; RV32IMZBS-NEXT: xor s2, s5, s2
+; RV32IMZBS-NEXT: lw s4, 168(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s3, s3, s4
+; RV32IMZBS-NEXT: xor t4, s0, t4
+; RV32IMZBS-NEXT: lw s0, 164(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t5, t5, s0
+; RV32IMZBS-NEXT: xor a7, t2, a7
+; RV32IMZBS-NEXT: lw t2, 544(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t2
+; RV32IMZBS-NEXT: xor a4, a4, a5
+; RV32IMZBS-NEXT: lw a5, 172(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a5
+; RV32IMZBS-NEXT: xor a0, a0, a1
+; RV32IMZBS-NEXT: xor a1, a2, t1
+; RV32IMZBS-NEXT: srli a2, a6, 1
+; RV32IMZBS-NEXT: lw s4, 724(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a5, a6, s4
+; RV32IMZBS-NEXT: xor a6, s2, s3
+; RV32IMZBS-NEXT: srli t1, t3, 1
+; RV32IMZBS-NEXT: and t2, t3, s4
+; RV32IMZBS-NEXT: xor t3, t4, t5
+; RV32IMZBS-NEXT: srli t4, t6, 1
+; RV32IMZBS-NEXT: and t5, t6, s4
+; RV32IMZBS-NEXT: xor a7, a7, t0
+; RV32IMZBS-NEXT: srli t0, s1, 1
+; RV32IMZBS-NEXT: and t6, s1, s4
+; RV32IMZBS-NEXT: xor a3, a4, a3
+; RV32IMZBS-NEXT: and a2, a2, s4
; RV32IMZBS-NEXT: slli a5, a5, 1
-; RV32IMZBS-NEXT: or a0, a0, a5
-; RV32IMZBS-NEXT: srli a5, t3, 1
-; RV32IMZBS-NEXT: and a6, t3, t1
-; RV32IMZBS-NEXT: and a5, a5, t1
-; RV32IMZBS-NEXT: slli a6, a6, 1
-; RV32IMZBS-NEXT: or a5, a5, a6
-; RV32IMZBS-NEXT: srli a6, t4, 1
-; RV32IMZBS-NEXT: and a7, t4, t1
-; RV32IMZBS-NEXT: and a6, a6, t1
-; RV32IMZBS-NEXT: slli a7, a7, 1
-; RV32IMZBS-NEXT: or t5, a6, a7
-; RV32IMZBS-NEXT: srli a6, t6, 1
-; RV32IMZBS-NEXT: and a7, t6, t1
-; RV32IMZBS-NEXT: and a6, a6, t1
-; RV32IMZBS-NEXT: slli a7, a7, 1
-; RV32IMZBS-NEXT: or s11, a6, a7
-; RV32IMZBS-NEXT: or a3, a4, a3
-; RV32IMZBS-NEXT: sw a3, 704(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: or a1, a1, a2
-; RV32IMZBS-NEXT: sw a1, 716(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a1, a5, 2
-; RV32IMZBS-NEXT: sw a1, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a1, a5, 1
-; RV32IMZBS-NEXT: sw a1, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi a3, a5, 4
-; RV32IMZBS-NEXT: andi a4, a5, 8
-; RV32IMZBS-NEXT: andi a6, a5, 16
-; RV32IMZBS-NEXT: andi a7, a5, 32
-; RV32IMZBS-NEXT: andi t0, a5, 64
-; RV32IMZBS-NEXT: andi t1, a5, 128
-; RV32IMZBS-NEXT: andi t2, a5, 256
-; RV32IMZBS-NEXT: andi t3, a5, 512
-; RV32IMZBS-NEXT: andi t4, a5, 1024
-; RV32IMZBS-NEXT: lw a1, 740(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and s2, a5, a1
-; RV32IMZBS-NEXT: lui t6, 1
-; RV32IMZBS-NEXT: and s3, a5, t6
-; RV32IMZBS-NEXT: lui t6, 2
-; RV32IMZBS-NEXT: and s4, a5, t6
-; RV32IMZBS-NEXT: lui t6, 4
-; RV32IMZBS-NEXT: and s7, a5, t6
-; RV32IMZBS-NEXT: lui t6, 8
-; RV32IMZBS-NEXT: and s8, a5, t6
-; RV32IMZBS-NEXT: lui t6, 16
-; RV32IMZBS-NEXT: and s10, a5, t6
-; RV32IMZBS-NEXT: lui t6, 32
-; RV32IMZBS-NEXT: and t6, a5, t6
-; RV32IMZBS-NEXT: lui s0, 64
-; RV32IMZBS-NEXT: and s0, a5, s0
-; RV32IMZBS-NEXT: lui s1, 128
-; RV32IMZBS-NEXT: and s1, a5, s1
-; RV32IMZBS-NEXT: lui s5, 256
-; RV32IMZBS-NEXT: and s5, a5, s5
-; RV32IMZBS-NEXT: lui a2, 512
-; RV32IMZBS-NEXT: and s6, a5, a2
-; RV32IMZBS-NEXT: lui s9, 1024
-; RV32IMZBS-NEXT: and s9, a5, s9
-; RV32IMZBS-NEXT: lui ra, 2048
-; RV32IMZBS-NEXT: and ra, a5, ra
-; RV32IMZBS-NEXT: lui a2, 4096
-; RV32IMZBS-NEXT: and a2, a5, a2
+; RV32IMZBS-NEXT: and a4, t1, s4
+; RV32IMZBS-NEXT: slli t2, t2, 1
+; RV32IMZBS-NEXT: lw t1, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, a0, t1
+; RV32IMZBS-NEXT: lw s0, 720(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s0, a1, s0
+; RV32IMZBS-NEXT: and t4, t4, s4
+; RV32IMZBS-NEXT: slli t5, t5, 1
+; RV32IMZBS-NEXT: and t0, t0, s4
+; RV32IMZBS-NEXT: slli t6, t6, 1
+; RV32IMZBS-NEXT: lw a0, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s1, a6, a0
+; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t3, t3, a0
+; RV32IMZBS-NEXT: lw a0, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, a0
+; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a0
+; RV32IMZBS-NEXT: or a0, a2, a5
+; RV32IMZBS-NEXT: or a1, a4, t2
+; RV32IMZBS-NEXT: srli a2, t2, 31
+; RV32IMZBS-NEXT: xor a4, s0, t1
+; RV32IMZBS-NEXT: sw a4, 720(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: or t2, t4, t5
+; RV32IMZBS-NEXT: or s2, t0, t6
+; RV32IMZBS-NEXT: srli a4, t6, 31
+; RV32IMZBS-NEXT: xor a5, t3, s1
+; RV32IMZBS-NEXT: sw a5, 716(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: srli a5, a7, 8
+; RV32IMZBS-NEXT: srli t0, a7, 24
+; RV32IMZBS-NEXT: slli a6, a7, 24
+; RV32IMZBS-NEXT: lw s5, 740(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a7, a7, s5
+; RV32IMZBS-NEXT: srli t3, a3, 8
+; RV32IMZBS-NEXT: srli t4, a3, 24
+; RV32IMZBS-NEXT: slli t5, a3, 24
+; RV32IMZBS-NEXT: and a3, a3, s5
+; RV32IMZBS-NEXT: slli t6, a0, 1
+; RV32IMZBS-NEXT: andi s0, a1, 2
+; RV32IMZBS-NEXT: slli s1, a0, 2
+; RV32IMZBS-NEXT: andi t1, a1, 4
+; RV32IMZBS-NEXT: slli s3, a0, 3
+; RV32IMZBS-NEXT: and a5, a5, s5
+; RV32IMZBS-NEXT: or a5, a5, t0
+; RV32IMZBS-NEXT: andi t0, a1, 8
+; RV32IMZBS-NEXT: slli a7, a7, 8
+; RV32IMZBS-NEXT: or a7, a6, a7
+; RV32IMZBS-NEXT: slli a6, a0, 4
+; RV32IMZBS-NEXT: and t3, t3, s5
+; RV32IMZBS-NEXT: or t3, t3, t4
+; RV32IMZBS-NEXT: andi t4, a1, 16
+; RV32IMZBS-NEXT: slli a3, a3, 8
+; RV32IMZBS-NEXT: or a3, t5, a3
+; RV32IMZBS-NEXT: slli t5, a0, 31
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a2, a2, t5
; RV32IMZBS-NEXT: sw a2, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 8192
-; RV32IMZBS-NEXT: and a2, a5, a2
-; RV32IMZBS-NEXT: sw a2, 692(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 16384
-; RV32IMZBS-NEXT: and a2, a5, a2
-; RV32IMZBS-NEXT: sw a2, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 32768
-; RV32IMZBS-NEXT: and a2, a5, a2
-; RV32IMZBS-NEXT: sw a2, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 65536
-; RV32IMZBS-NEXT: and a2, a5, a2
-; RV32IMZBS-NEXT: sw a2, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 131072
-; RV32IMZBS-NEXT: and a2, a5, a2
-; RV32IMZBS-NEXT: sw a2, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 262144
-; RV32IMZBS-NEXT: and a2, a5, a2
-; RV32IMZBS-NEXT: sw a2, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a2, 524288
-; RV32IMZBS-NEXT: and a5, a5, a2
-; RV32IMZBS-NEXT: lw a2, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a0, a2
-; RV32IMZBS-NEXT: sw a2, 628(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a0, a2
-; RV32IMZBS-NEXT: sw a2, 624(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, a3
-; RV32IMZBS-NEXT: sw a2, 620(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, a4
-; RV32IMZBS-NEXT: sw a2, 616(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, a6
-; RV32IMZBS-NEXT: sw a2, 612(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, a7
-; RV32IMZBS-NEXT: sw a2, 608(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, t0
-; RV32IMZBS-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, t1
-; RV32IMZBS-NEXT: sw a3, 696(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, t2
-; RV32IMZBS-NEXT: sw a2, 604(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, t3
-; RV32IMZBS-NEXT: sw a2, 600(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, t4
-; RV32IMZBS-NEXT: sw a3, 648(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, s2
-; RV32IMZBS-NEXT: sw a3, 672(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, s3
-; RV32IMZBS-NEXT: sw a3, 688(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, s4
-; RV32IMZBS-NEXT: sw a2, 596(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, s7
-; RV32IMZBS-NEXT: sw a2, 592(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, s8
-; RV32IMZBS-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, s10
-; RV32IMZBS-NEXT: sw a3, 664(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, t6
-; RV32IMZBS-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, s0
+; RV32IMZBS-NEXT: slli a2, t2, 31
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a2, a4, a2
+; RV32IMZBS-NEXT: sw a2, 708(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a2, a0, 5
+; RV32IMZBS-NEXT: or a4, a7, a5
+; RV32IMZBS-NEXT: sw a4, 704(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a4, a1, 32
+; RV32IMZBS-NEXT: or a3, a3, t3
; RV32IMZBS-NEXT: sw a3, 712(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, s1
-; RV32IMZBS-NEXT: sw a2, 588(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a2, a0, s5
-; RV32IMZBS-NEXT: sw a2, 584(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, s6
+; RV32IMZBS-NEXT: slli a3, a0, 6
+; RV32IMZBS-NEXT: seqz a5, s0
+; RV32IMZBS-NEXT: addi a5, a5, -1
+; RV32IMZBS-NEXT: and a5, a5, t6
+; RV32IMZBS-NEXT: sw a5, 692(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a5, a1, 64
+; RV32IMZBS-NEXT: seqz a7, t1
+; RV32IMZBS-NEXT: addi a7, a7, -1
+; RV32IMZBS-NEXT: and a7, a7, s1
+; RV32IMZBS-NEXT: sw a7, 688(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a7, a0, 7
+; RV32IMZBS-NEXT: seqz t0, t0
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: and t0, t0, s3
+; RV32IMZBS-NEXT: sw t0, 676(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi t0, a1, 128
+; RV32IMZBS-NEXT: seqz t3, t4
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: and a6, t3, a6
+; RV32IMZBS-NEXT: sw a6, 672(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a6, a0, 8
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a2, a4, a2
+; RV32IMZBS-NEXT: sw a2, 664(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a2, a1, 256
+; RV32IMZBS-NEXT: seqz a4, a5
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a3, a4, a3
+; RV32IMZBS-NEXT: sw a3, 684(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a3, a0, 9
+; RV32IMZBS-NEXT: seqz a4, t0
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a5, a4, a7
+; RV32IMZBS-NEXT: andi a4, a1, 512
+; RV32IMZBS-NEXT: seqz a2, a2
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: and a7, a2, a6
+; RV32IMZBS-NEXT: slli a2, a0, 10
+; RV32IMZBS-NEXT: seqz a4, a4
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: and a3, a4, a3
+; RV32IMZBS-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a3, a1, 1024
+; RV32IMZBS-NEXT: seqz a3, a3
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: and a2, a3, a2
+; RV32IMZBS-NEXT: sw a2, 696(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a2, a1
+; RV32IMZBS-NEXT: bexti a3, a2, 11
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 11
+; RV32IMZBS-NEXT: and a3, a3, a4
; RV32IMZBS-NEXT: sw a3, 636(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, s9
-; RV32IMZBS-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a3, a0, ra
-; RV32IMZBS-NEXT: sw a3, 668(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a0, a2
-; RV32IMZBS-NEXT: sw a2, 576(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 692(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a2, a0, a2
-; RV32IMZBS-NEXT: sw a2, 572(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a0, a2
+; RV32IMZBS-NEXT: bexti a3, a2, 12
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 12
+; RV32IMZBS-NEXT: and a3, a3, a4
; RV32IMZBS-NEXT: sw a3, 632(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a0, a2
+; RV32IMZBS-NEXT: bexti a3, a2, 13
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 13
+; RV32IMZBS-NEXT: and a3, a3, a4
; RV32IMZBS-NEXT: sw a3, 644(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a0, a2
-; RV32IMZBS-NEXT: sw a3, 660(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a0, a2
-; RV32IMZBS-NEXT: sw a3, 692(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a2, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a3, a0, a2
-; RV32IMZBS-NEXT: sw a3, 700(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, a0, a5
-; RV32IMZBS-NEXT: sw a0, 708(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: and s4, s11, a1
-; RV32IMZBS-NEXT: lui a0, 1
-; RV32IMZBS-NEXT: and a7, s11, a0
-; RV32IMZBS-NEXT: lui a0, 2
-; RV32IMZBS-NEXT: and t0, s11, a0
-; RV32IMZBS-NEXT: lui a0, 4
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 740(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 8
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: bexti a3, a2, 14
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 14
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 668(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 15
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 15
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 680(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 16
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 16
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 612(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 17
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 17
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 608(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 18
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 18
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 620(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 19
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 19
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 648(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 20
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 20
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 652(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 21
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 21
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 656(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 22
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 22
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 600(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 23
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 23
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 596(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 24
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 24
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 604(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 25
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 25
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 616(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 26
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 26
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 624(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 27
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 27
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 628(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 28
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 28
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 640(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a3, a2, 29
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, a0, 29
+; RV32IMZBS-NEXT: and a3, a3, a4
+; RV32IMZBS-NEXT: sw a3, 592(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a1, a1, 1
+; RV32IMZBS-NEXT: seqz a1, a1
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a1, a1, a0
+; RV32IMZBS-NEXT: sw a1, 584(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: slli a0, a0, 30
+; RV32IMZBS-NEXT: bexti a1, a2, 30
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: and a0, a1, a0
+; RV32IMZBS-NEXT: sw a0, 588(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s2, 2
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 1
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 16
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, s2, 4
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 2
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 576(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s2, 8
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 3
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 32
-; RV32IMZBS-NEXT: and a0, s11, a0
+; RV32IMZBS-NEXT: andi a0, s2, 16
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 4
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s2, 32
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 5
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 64
-; RV32IMZBS-NEXT: and s0, s11, a0
-; RV32IMZBS-NEXT: lui a0, 128
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 256
-; RV32IMZBS-NEXT: and s5, s11, a0
-; RV32IMZBS-NEXT: lui a0, 512
-; RV32IMZBS-NEXT: and s6, s11, a0
-; RV32IMZBS-NEXT: lui a0, 1024
-; RV32IMZBS-NEXT: and s7, s11, a0
-; RV32IMZBS-NEXT: lui a0, 2048
-; RV32IMZBS-NEXT: and s8, s11, a0
-; RV32IMZBS-NEXT: lui a0, 4096
-; RV32IMZBS-NEXT: and s9, s11, a0
-; RV32IMZBS-NEXT: lui a0, 8192
-; RV32IMZBS-NEXT: and s10, s11, a0
-; RV32IMZBS-NEXT: lui a0, 16384
-; RV32IMZBS-NEXT: and ra, s11, a0
-; RV32IMZBS-NEXT: lui a0, 32768
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 528(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 65536
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 524(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 131072
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 520(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 262144
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 516(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lui a0, 524288
-; RV32IMZBS-NEXT: and a0, s11, a0
-; RV32IMZBS-NEXT: sw a0, 512(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: andi t3, s11, 2
-; RV32IMZBS-NEXT: andi t4, s11, 1
-; RV32IMZBS-NEXT: andi s2, s11, 4
-; RV32IMZBS-NEXT: andi a0, s11, 8
-; RV32IMZBS-NEXT: andi a1, s11, 16
-; RV32IMZBS-NEXT: andi a2, s11, 32
-; RV32IMZBS-NEXT: andi a3, s11, 64
-; RV32IMZBS-NEXT: andi a4, s11, 128
-; RV32IMZBS-NEXT: andi a5, s11, 256
-; RV32IMZBS-NEXT: andi a6, s11, 512
-; RV32IMZBS-NEXT: andi s11, s11, 1024
-; RV32IMZBS-NEXT: mul s3, t5, t3
-; RV32IMZBS-NEXT: mul s1, t5, t4
-; RV32IMZBS-NEXT: mul s2, t5, s2
-; RV32IMZBS-NEXT: mul t6, t5, a0
-; RV32IMZBS-NEXT: mul t4, t5, a1
-; RV32IMZBS-NEXT: mul t3, t5, a2
-; RV32IMZBS-NEXT: mul a0, t5, a3
-; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, a4
+; RV32IMZBS-NEXT: andi a0, s2, 64
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 6
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 564(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t2, t5, a5
-; RV32IMZBS-NEXT: mul t1, t5, a6
-; RV32IMZBS-NEXT: mul a0, t5, s11
+; RV32IMZBS-NEXT: andi a0, s2, 128
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 7
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 540(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: andi a0, s2, 256
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 8
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 536(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, s4
-; RV32IMZBS-NEXT: sw a0, 552(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, a7
-; RV32IMZBS-NEXT: sw a0, 560(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul t0, t5, t0
-; RV32IMZBS-NEXT: lw a0, 740(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a7, t5, a0
-; RV32IMZBS-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s11, t5, a0
-; RV32IMZBS-NEXT: lw a0, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t5, a0
+; RV32IMZBS-NEXT: andi a0, s2, 512
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 9
+; RV32IMZBS-NEXT: and a0, a0, a1
; RV32IMZBS-NEXT: sw a0, 544(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 556(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t5, a0
-; RV32IMZBS-NEXT: sw a0, 556(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, s0
-; RV32IMZBS-NEXT: sw a0, 740(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 548(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s0, t5, a0
-; RV32IMZBS-NEXT: mul s5, t5, s5
-; RV32IMZBS-NEXT: mul s6, t5, s6
-; RV32IMZBS-NEXT: mul a0, t5, s7
-; RV32IMZBS-NEXT: sw a0, 532(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul a0, t5, s8
-; RV32IMZBS-NEXT: sw a0, 548(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: mul s9, t5, s9
-; RV32IMZBS-NEXT: mul s10, t5, s10
-; RV32IMZBS-NEXT: mul s4, t5, ra
-; RV32IMZBS-NEXT: lw a0, 528(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s7, t5, a0
-; RV32IMZBS-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul s8, t5, a0
-; RV32IMZBS-NEXT: lw a0, 520(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul ra, t5, a0
-; RV32IMZBS-NEXT: lw a0, 516(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t5, a0
-; RV32IMZBS-NEXT: sw a0, 580(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 512(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: mul a0, t5, a0
-; RV32IMZBS-NEXT: sw a0, 568(sp) # 4-byte Folded Spill
-; RV32IMZBS-NEXT: lw a0, 628(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 624(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a1, a0
-; RV32IMZBS-NEXT: lw a0, 620(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a1, 616(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: andi a0, s2, 1024
+; RV32IMZBS-NEXT: seqz a0, a0
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: slli a1, t2, 10
+; RV32IMZBS-NEXT: and a0, a0, a1
+; RV32IMZBS-NEXT: sw a0, 572(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: not a0, s2
+; RV32IMZBS-NEXT: bexti a1, a0, 11
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t2, 11
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 516(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 12
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t2, 12
+; RV32IMZBS-NEXT: and s9, a1, a2
+; RV32IMZBS-NEXT: bexti a1, a0, 13
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t2, 13
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 524(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 14
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t2, 14
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 548(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 15
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t2, 15
+; RV32IMZBS-NEXT: and a1, a1, a2
+; RV32IMZBS-NEXT: sw a1, 552(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 16
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a3, t2, 16
+; RV32IMZBS-NEXT: and s5, a1, a3
+; RV32IMZBS-NEXT: bexti a1, a0, 17
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a2, t2, 17
+; RV32IMZBS-NEXT: and s3, a1, a2
+; RV32IMZBS-NEXT: bexti a1, a0, 18
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, t2, 18
+; RV32IMZBS-NEXT: and s10, a1, a4
+; RV32IMZBS-NEXT: bexti a1, a0, 19
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, t2, 19
+; RV32IMZBS-NEXT: and a1, a1, a4
+; RV32IMZBS-NEXT: sw a1, 520(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 20
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli s0, t2, 20
+; RV32IMZBS-NEXT: and a1, a1, s0
+; RV32IMZBS-NEXT: sw a1, 528(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 21
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli s1, t2, 21
+; RV32IMZBS-NEXT: and a1, a1, s1
+; RV32IMZBS-NEXT: sw a1, 532(sp) # 4-byte Folded Spill
+; RV32IMZBS-NEXT: bexti a1, a0, 22
+; RV32IMZBS-NEXT: addi a1, a1, -1
+; RV32IMZBS-NEXT: slli a4, t2, 22
+; RV32IMZBS-NEXT: and s0, a1, a4
+; RV32IMZBS-NEXT: bexti a2, a0, 23
+; RV32IMZBS-NEXT: addi a2, a2, -1
+; RV32IMZBS-NEXT: slli a4, t2, 23
+; RV32IMZBS-NEXT: and t6, a2, a4
+; RV32IMZBS-NEXT: bexti a3, a0, 24
+; RV32IMZBS-NEXT: addi a3, a3, -1
+; RV32IMZBS-NEXT: slli a4, t2, 24
+; RV32IMZBS-NEXT: and s1, a3, a4
+; RV32IMZBS-NEXT: bexti a4, a0, 25
+; RV32IMZBS-NEXT: addi a4, a4, -1
+; RV32IMZBS-NEXT: slli t0, t2, 25
+; RV32IMZBS-NEXT: and s7, a4, t0
+; RV32IMZBS-NEXT: bexti t0, a0, 26
+; RV32IMZBS-NEXT: addi t0, t0, -1
+; RV32IMZBS-NEXT: slli t3, t2, 26
+; RV32IMZBS-NEXT: and s8, t0, t3
+; RV32IMZBS-NEXT: bexti t3, a0, 27
+; RV32IMZBS-NEXT: addi t3, t3, -1
+; RV32IMZBS-NEXT: slli t5, t2, 27
+; RV32IMZBS-NEXT: and s11, t3, t5
+; RV32IMZBS-NEXT: bexti t5, a0, 28
+; RV32IMZBS-NEXT: addi t5, t5, -1
+; RV32IMZBS-NEXT: slli a6, t2, 28
+; RV32IMZBS-NEXT: and t5, t5, a6
+; RV32IMZBS-NEXT: bexti a6, a0, 29
+; RV32IMZBS-NEXT: addi a6, a6, -1
+; RV32IMZBS-NEXT: slli t1, t2, 29
+; RV32IMZBS-NEXT: and t4, a6, t1
+; RV32IMZBS-NEXT: andi t1, s2, 1
+; RV32IMZBS-NEXT: seqz t1, t1
+; RV32IMZBS-NEXT: addi t1, t1, -1
+; RV32IMZBS-NEXT: and t1, t1, t2
+; RV32IMZBS-NEXT: slli t2, t2, 30
+; RV32IMZBS-NEXT: bexti a0, a0, 30
+; RV32IMZBS-NEXT: addi a0, a0, -1
+; RV32IMZBS-NEXT: and t3, a0, t2
+; RV32IMZBS-NEXT: lw a0, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 584(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, a1, a0
+; RV32IMZBS-NEXT: lw a0, 688(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 676(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s2, a0, a1
+; RV32IMZBS-NEXT: lw a0, 672(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 664(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a0, a1
+; RV32IMZBS-NEXT: xor a7, a5, a7
+; RV32IMZBS-NEXT: lw a0, 636(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 632(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, a0, a1
+; RV32IMZBS-NEXT: lw a0, 612(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a3, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a0, a3
+; RV32IMZBS-NEXT: lw a0, 600(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a4, 596(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a0, a4
+; RV32IMZBS-NEXT: lw a0, 592(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a5, 588(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, a0, a5
+; RV32IMZBS-NEXT: lw a0, 580(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, t1, a0
+; RV32IMZBS-NEXT: lw a0, 576(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 568(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 612(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a2, 608(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a1, 560(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw a2, 556(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 604(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a3, 600(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a3
-; RV32IMZBS-NEXT: lw a3, 596(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a4, 592(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a4
-; RV32IMZBS-NEXT: lw a4, 588(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw a5, 584(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a5
-; RV32IMZBS-NEXT: lw a5, 576(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: lw t5, 572(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, t5
-; RV32IMZBS-NEXT: xor s1, s1, s3
-; RV32IMZBS-NEXT: xor t6, s2, t6
+; RV32IMZBS-NEXT: lw a2, 540(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw ra, 536(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, ra
+; RV32IMZBS-NEXT: lw ra, 516(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor s9, ra, s9
+; RV32IMZBS-NEXT: xor s3, s5, s3
+; RV32IMZBS-NEXT: xor t6, s0, t6
; RV32IMZBS-NEXT: xor t3, t4, t3
-; RV32IMZBS-NEXT: xor t1, t2, t1
-; RV32IMZBS-NEXT: xor a7, t0, a7
-; RV32IMZBS-NEXT: xor t0, s0, s5
-; RV32IMZBS-NEXT: xor t2, s9, s10
-; RV32IMZBS-NEXT: lw s0, 704(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli t4, s0, 4
-; RV32IMZBS-NEXT: lw s3, 728(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and s0, s0, s3
-; RV32IMZBS-NEXT: and t4, t4, s3
-; RV32IMZBS-NEXT: slli s0, s0, 4
-; RV32IMZBS-NEXT: or t4, t4, s0
-; RV32IMZBS-NEXT: lw s2, 716(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: srli s0, s2, 4
+; RV32IMZBS-NEXT: xor t2, t2, s2
+; RV32IMZBS-NEXT: lw t4, 684(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a6, a6, t4
+; RV32IMZBS-NEXT: lw t4, 660(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, a7, t4
+; RV32IMZBS-NEXT: lw t4, 644(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t0, t4
+; RV32IMZBS-NEXT: lw t4, 620(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t4, a3, t4
+; RV32IMZBS-NEXT: lw a3, 604(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a3
+; RV32IMZBS-NEXT: lw a3, 700(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a5, a3
+; RV32IMZBS-NEXT: xor a5, t1, a0
+; RV32IMZBS-NEXT: lw a0, 564(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a1, a1, a0
+; RV32IMZBS-NEXT: lw a0, 544(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a0
+; RV32IMZBS-NEXT: lw a0, 524(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, s9, a0
+; RV32IMZBS-NEXT: xor s0, s3, s10
+; RV32IMZBS-NEXT: xor t6, t6, s1
+; RV32IMZBS-NEXT: lw a0, 708(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a0, t3, a0
+; RV32IMZBS-NEXT: lw s1, 704(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli t3, s1, 4
+; RV32IMZBS-NEXT: lw s3, 744(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and s1, s1, s3
+; RV32IMZBS-NEXT: and t3, t3, s3
+; RV32IMZBS-NEXT: slli s1, s1, 4
+; RV32IMZBS-NEXT: or t3, t3, s1
+; RV32IMZBS-NEXT: lw s2, 712(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: srli s1, s2, 4
; RV32IMZBS-NEXT: and s2, s2, s3
-; RV32IMZBS-NEXT: and s0, s0, s3
+; RV32IMZBS-NEXT: and s1, s1, s3
; RV32IMZBS-NEXT: slli s2, s2, 4
-; RV32IMZBS-NEXT: or s0, s0, s2
-; RV32IMZBS-NEXT: xor a0, a6, a0
-; RV32IMZBS-NEXT: lw a6, 656(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a6
-; RV32IMZBS-NEXT: lw a6, 648(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, a6
-; RV32IMZBS-NEXT: lw a6, 640(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, a6
-; RV32IMZBS-NEXT: lw a6, 636(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, a6
-; RV32IMZBS-NEXT: lw a6, 632(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, a6
-; RV32IMZBS-NEXT: xor a6, s1, t6
-; RV32IMZBS-NEXT: lw t5, 540(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t3, t3, t5
-; RV32IMZBS-NEXT: lw t5, 536(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t1, t1, t5
-; RV32IMZBS-NEXT: xor a7, a7, s11
-; RV32IMZBS-NEXT: xor t0, t0, s6
-; RV32IMZBS-NEXT: xor t2, t2, s4
-; RV32IMZBS-NEXT: xor a0, a0, a1
-; RV32IMZBS-NEXT: lw a1, 672(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a2, a1
-; RV32IMZBS-NEXT: lw a2, 664(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a3, a2
-; RV32IMZBS-NEXT: lw a3, 652(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a4, a3
-; RV32IMZBS-NEXT: lw a4, 644(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a5, a4
-; RV32IMZBS-NEXT: xor a5, a6, t3
-; RV32IMZBS-NEXT: lw a6, 552(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, t1, a6
-; RV32IMZBS-NEXT: lw t1, 544(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, t1
-; RV32IMZBS-NEXT: lw t1, 532(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor t0, t0, t1
-; RV32IMZBS-NEXT: xor t1, t2, s7
+; RV32IMZBS-NEXT: or s1, s1, s2
+; RV32IMZBS-NEXT: xor a6, t2, a6
; RV32IMZBS-NEXT: lw t2, 696(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, t2
-; RV32IMZBS-NEXT: lw t2, 688(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, t2
-; RV32IMZBS-NEXT: lw t2, 684(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t2
-; RV32IMZBS-NEXT: lw t2, 668(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a3, a3, t2
-; RV32IMZBS-NEXT: lw t2, 660(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a4, a4, t2
-; RV32IMZBS-NEXT: lw t2, 564(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a5, a5, t2
-; RV32IMZBS-NEXT: lw t2, 560(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, t2
-; RV32IMZBS-NEXT: lw t2, 556(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a7, a7, t2
-; RV32IMZBS-NEXT: lw t2, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t2, 668(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor t0, t0, t2
-; RV32IMZBS-NEXT: xor t1, t1, s8
-; RV32IMZBS-NEXT: srli t2, t4, 2
-; RV32IMZBS-NEXT: lw t6, 732(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and t3, t4, t6
-; RV32IMZBS-NEXT: and t2, t2, t6
-; RV32IMZBS-NEXT: slli t3, t3, 2
-; RV32IMZBS-NEXT: or t2, t2, t3
-; RV32IMZBS-NEXT: srli t3, s0, 2
-; RV32IMZBS-NEXT: and t4, s0, t6
-; RV32IMZBS-NEXT: and t3, t3, t6
-; RV32IMZBS-NEXT: slli t4, t4, 2
-; RV32IMZBS-NEXT: or t3, t3, t4
-; RV32IMZBS-NEXT: lw t4, 712(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a2, t4
-; RV32IMZBS-NEXT: lw t4, 692(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: lw t2, 648(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t2, t4, t2
+; RV32IMZBS-NEXT: lw t4, 616(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a4, a4, t4
-; RV32IMZBS-NEXT: lw t4, 740(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a7, t4
-; RV32IMZBS-NEXT: xor t1, t1, ra
-; RV32IMZBS-NEXT: xor a1, a0, a1
+; RV32IMZBS-NEXT: xor a1, a5, a1
+; RV32IMZBS-NEXT: lw a5, 572(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a2, a5
+; RV32IMZBS-NEXT: lw a5, 548(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, t1, a5
+; RV32IMZBS-NEXT: lw t1, 520(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t1, s0, t1
+; RV32IMZBS-NEXT: xor t4, t6, s7
+; RV32IMZBS-NEXT: xor a6, a6, a7
+; RV32IMZBS-NEXT: lw a7, 680(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, t0, a7
+; RV32IMZBS-NEXT: lw t0, 652(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor t0, t2, t0
+; RV32IMZBS-NEXT: lw t2, 624(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, t2
; RV32IMZBS-NEXT: xor a1, a1, a2
-; RV32IMZBS-NEXT: lw a2, 700(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a2, a4, a2
-; RV32IMZBS-NEXT: xor a4, a5, a6
-; RV32IMZBS-NEXT: xor a4, a4, a7
-; RV32IMZBS-NEXT: lw a6, 580(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, t1, a6
-; RV32IMZBS-NEXT: xor a1, a1, a3
-; RV32IMZBS-NEXT: lw a3, 708(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a7, a2, a3
+; RV32IMZBS-NEXT: lw a2, 552(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a5, a2
+; RV32IMZBS-NEXT: lw a5, 528(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a5, t1, a5
+; RV32IMZBS-NEXT: xor t1, t4, s8
+; RV32IMZBS-NEXT: xor a6, a6, a7
+; RV32IMZBS-NEXT: lw a7, 656(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a7, t0, a7
+; RV32IMZBS-NEXT: lw t0, 628(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: xor a4, a4, t0
-; RV32IMZBS-NEXT: slli a0, a0, 24
-; RV32IMZBS-NEXT: lw a2, 568(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a6, a6, a2
-; RV32IMZBS-NEXT: srli a2, t2, 1
-; RV32IMZBS-NEXT: lw t1, 736(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and a3, t2, t1
-; RV32IMZBS-NEXT: and a2, a2, t1
-; RV32IMZBS-NEXT: slli a3, a3, 1
-; RV32IMZBS-NEXT: or a2, a2, a3
-; RV32IMZBS-NEXT: srli a3, t3, 1
-; RV32IMZBS-NEXT: and t0, t3, t1
-; RV32IMZBS-NEXT: and a3, a3, t1
-; RV32IMZBS-NEXT: mv t2, t1
-; RV32IMZBS-NEXT: slli t0, t0, 1
-; RV32IMZBS-NEXT: or a3, a3, t0
-; RV32IMZBS-NEXT: lw t1, 744(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: and t0, a1, t1
-; RV32IMZBS-NEXT: slli t0, t0, 8
-; RV32IMZBS-NEXT: or a0, a0, t0
-; RV32IMZBS-NEXT: slli a5, a5, 24
-; RV32IMZBS-NEXT: xor a7, a1, a7
-; RV32IMZBS-NEXT: srli a1, a1, 8
-; RV32IMZBS-NEXT: and a1, a1, t1
-; RV32IMZBS-NEXT: srli a7, a7, 24
-; RV32IMZBS-NEXT: or a1, a1, a7
-; RV32IMZBS-NEXT: and a7, a4, t1
-; RV32IMZBS-NEXT: slli a7, a7, 8
-; RV32IMZBS-NEXT: or a5, a5, a7
-; RV32IMZBS-NEXT: xor a6, a4, a6
-; RV32IMZBS-NEXT: srli a4, a4, 8
-; RV32IMZBS-NEXT: and a4, a4, t1
-; RV32IMZBS-NEXT: srli a6, a6, 24
-; RV32IMZBS-NEXT: or a4, a4, a6
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: lw a2, 532(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a2, a5, a2
+; RV32IMZBS-NEXT: xor a5, t1, s11
+; RV32IMZBS-NEXT: srli t0, t3, 2
+; RV32IMZBS-NEXT: and t1, t3, s6
+; RV32IMZBS-NEXT: and t0, t0, s6
+; RV32IMZBS-NEXT: slli t1, t1, 2
+; RV32IMZBS-NEXT: or t0, t0, t1
+; RV32IMZBS-NEXT: srli t1, s1, 2
+; RV32IMZBS-NEXT: and t2, s1, s6
+; RV32IMZBS-NEXT: and t1, t1, s6
+; RV32IMZBS-NEXT: slli t2, t2, 2
+; RV32IMZBS-NEXT: or t1, t1, t2
+; RV32IMZBS-NEXT: xor a6, a6, a7
+; RV32IMZBS-NEXT: lw a7, 640(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a4, a4, a7
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: xor a2, a5, t5
+; RV32IMZBS-NEXT: xor a4, a6, a4
+; RV32IMZBS-NEXT: xor a1, a1, a2
+; RV32IMZBS-NEXT: srli a2, t0, 1
+; RV32IMZBS-NEXT: and a5, t0, s4
+; RV32IMZBS-NEXT: xor a3, a4, a3
+; RV32IMZBS-NEXT: srli a4, t1, 1
+; RV32IMZBS-NEXT: and a6, t1, s4
+; RV32IMZBS-NEXT: and a2, a2, s4
+; RV32IMZBS-NEXT: slli a5, a5, 1
+; RV32IMZBS-NEXT: and a4, a4, s4
+; RV32IMZBS-NEXT: slli a6, a6, 1
+; RV32IMZBS-NEXT: xor a7, a1, a0
+; RV32IMZBS-NEXT: or a1, a2, a5
+; RV32IMZBS-NEXT: or a0, a4, a6
+; RV32IMZBS-NEXT: srli a2, a3, 8
+; RV32IMZBS-NEXT: srli a4, a3, 24
+; RV32IMZBS-NEXT: slli a5, a3, 24
+; RV32IMZBS-NEXT: lw t0, 740(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: and a3, a3, t0
+; RV32IMZBS-NEXT: and a2, a2, t0
+; RV32IMZBS-NEXT: or a2, a2, a4
+; RV32IMZBS-NEXT: srli a4, a7, 8
+; RV32IMZBS-NEXT: slli a3, a3, 8
+; RV32IMZBS-NEXT: or a3, a5, a3
+; RV32IMZBS-NEXT: srli a5, a7, 24
+; RV32IMZBS-NEXT: and a4, a4, t0
+; RV32IMZBS-NEXT: or a4, a4, a5
+; RV32IMZBS-NEXT: slli a5, a7, 24
+; RV32IMZBS-NEXT: and a6, a7, t0
+; RV32IMZBS-NEXT: slli a6, a6, 8
+; RV32IMZBS-NEXT: or a5, a5, a6
; RV32IMZBS-NEXT: lui a6, 349525
; RV32IMZBS-NEXT: addi a6, a6, 1364
-; RV32IMZBS-NEXT: or a0, a0, a1
+; RV32IMZBS-NEXT: or a2, a3, a2
; RV32IMZBS-NEXT: or a4, a5, a4
-; RV32IMZBS-NEXT: srli a1, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, s3
+; RV32IMZBS-NEXT: srli a3, a2, 4
+; RV32IMZBS-NEXT: and a2, a2, s3
; RV32IMZBS-NEXT: srli a5, a4, 4
; RV32IMZBS-NEXT: and a4, a4, s3
-; RV32IMZBS-NEXT: and a1, a1, s3
-; RV32IMZBS-NEXT: slli a0, a0, 4
+; RV32IMZBS-NEXT: and a3, a3, s3
+; RV32IMZBS-NEXT: slli a2, a2, 4
; RV32IMZBS-NEXT: and a5, a5, s3
; RV32IMZBS-NEXT: slli a4, a4, 4
-; RV32IMZBS-NEXT: or a0, a1, a0
+; RV32IMZBS-NEXT: or a2, a3, a2
; RV32IMZBS-NEXT: or a4, a5, a4
-; RV32IMZBS-NEXT: srli a1, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, t6
+; RV32IMZBS-NEXT: srli a3, a2, 2
+; RV32IMZBS-NEXT: and a2, a2, s6
; RV32IMZBS-NEXT: srli a5, a4, 2
-; RV32IMZBS-NEXT: and a4, a4, t6
-; RV32IMZBS-NEXT: and a1, a1, t6
-; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: and a5, a5, t6
+; RV32IMZBS-NEXT: and a4, a4, s6
+; RV32IMZBS-NEXT: and a3, a3, s6
+; RV32IMZBS-NEXT: slli a2, a2, 2
+; RV32IMZBS-NEXT: and a5, a5, s6
; RV32IMZBS-NEXT: slli a4, a4, 2
-; RV32IMZBS-NEXT: or a0, a1, a0
+; RV32IMZBS-NEXT: or a2, a3, a2
; RV32IMZBS-NEXT: or a4, a5, a4
-; RV32IMZBS-NEXT: srli a1, a0, 1
+; RV32IMZBS-NEXT: srli a3, a2, 1
; RV32IMZBS-NEXT: srli a5, a4, 1
-; RV32IMZBS-NEXT: and a1, a1, a6
+; RV32IMZBS-NEXT: and a3, a3, a6
; RV32IMZBS-NEXT: and a5, a5, a6
-; RV32IMZBS-NEXT: and a0, a0, t2
-; RV32IMZBS-NEXT: slli a0, a0, 1
-; RV32IMZBS-NEXT: or a0, a1, a0
-; RV32IMZBS-NEXT: and a1, a4, t2
-; RV32IMZBS-NEXT: slli a1, a1, 1
-; RV32IMZBS-NEXT: or a1, a5, a1
-; RV32IMZBS-NEXT: srli a0, a0, 1
-; RV32IMZBS-NEXT: srli a1, a1, 1
-; RV32IMZBS-NEXT: lw a4, 724(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a0, a0, a4
+; RV32IMZBS-NEXT: and a2, a2, s4
+; RV32IMZBS-NEXT: slli a2, a2, 1
+; RV32IMZBS-NEXT: or a2, a3, a2
+; RV32IMZBS-NEXT: and a3, a4, s4
+; RV32IMZBS-NEXT: slli a3, a3, 1
+; RV32IMZBS-NEXT: or a3, a5, a3
+; RV32IMZBS-NEXT: srli a2, a2, 1
+; RV32IMZBS-NEXT: srli a3, a3, 1
; RV32IMZBS-NEXT: lw a4, 720(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: xor a1, a1, a4
-; RV32IMZBS-NEXT: srli a4, a0, 8
-; RV32IMZBS-NEXT: srli a5, a0, 24
-; RV32IMZBS-NEXT: slli a6, a0, 24
-; RV32IMZBS-NEXT: and a0, a0, t1
-; RV32IMZBS-NEXT: and a4, a4, t1
+; RV32IMZBS-NEXT: xor a2, a2, a4
+; RV32IMZBS-NEXT: lw a4, 716(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: xor a3, a3, a4
+; RV32IMZBS-NEXT: srli a4, a2, 8
+; RV32IMZBS-NEXT: srli a5, a2, 24
+; RV32IMZBS-NEXT: slli a6, a2, 24
+; RV32IMZBS-NEXT: and a2, a2, t0
+; RV32IMZBS-NEXT: and a4, a4, t0
; RV32IMZBS-NEXT: or a4, a4, a5
-; RV32IMZBS-NEXT: srli a5, a1, 8
-; RV32IMZBS-NEXT: slli a0, a0, 8
-; RV32IMZBS-NEXT: or a0, a6, a0
-; RV32IMZBS-NEXT: srli a6, a1, 24
-; RV32IMZBS-NEXT: and a5, a5, t1
+; RV32IMZBS-NEXT: srli a5, a3, 8
+; RV32IMZBS-NEXT: slli a2, a2, 8
+; RV32IMZBS-NEXT: or a2, a6, a2
+; RV32IMZBS-NEXT: srli a6, a3, 24
+; RV32IMZBS-NEXT: and a5, a5, t0
; RV32IMZBS-NEXT: or a5, a5, a6
-; RV32IMZBS-NEXT: and a6, a1, t1
-; RV32IMZBS-NEXT: slli a1, a1, 24
+; RV32IMZBS-NEXT: and a6, a3, t0
+; RV32IMZBS-NEXT: slli a3, a3, 24
; RV32IMZBS-NEXT: slli a6, a6, 8
-; RV32IMZBS-NEXT: or a1, a1, a6
-; RV32IMZBS-NEXT: or a0, a0, a4
-; RV32IMZBS-NEXT: or a1, a1, a5
-; RV32IMZBS-NEXT: srli a4, a0, 4
-; RV32IMZBS-NEXT: and a0, a0, s3
-; RV32IMZBS-NEXT: srli a5, a1, 4
-; RV32IMZBS-NEXT: and a1, a1, s3
+; RV32IMZBS-NEXT: or a3, a3, a6
+; RV32IMZBS-NEXT: or a2, a2, a4
+; RV32IMZBS-NEXT: or a3, a3, a5
+; RV32IMZBS-NEXT: srli a4, a2, 4
+; RV32IMZBS-NEXT: and a2, a2, s3
+; RV32IMZBS-NEXT: srli a5, a3, 4
+; RV32IMZBS-NEXT: and a3, a3, s3
; RV32IMZBS-NEXT: and a4, a4, s3
; RV32IMZBS-NEXT: and a5, a5, s3
-; RV32IMZBS-NEXT: slli a0, a0, 4
-; RV32IMZBS-NEXT: slli a1, a1, 4
-; RV32IMZBS-NEXT: or a0, a4, a0
-; RV32IMZBS-NEXT: or a1, a5, a1
-; RV32IMZBS-NEXT: srli a4, a0, 2
-; RV32IMZBS-NEXT: and a0, a0, t6
-; RV32IMZBS-NEXT: srli a5, a1, 2
-; RV32IMZBS-NEXT: and a1, a1, t6
-; RV32IMZBS-NEXT: and a4, a4, t6
-; RV32IMZBS-NEXT: and a5, a5, t6
-; RV32IMZBS-NEXT: slli a0, a0, 2
-; RV32IMZBS-NEXT: slli a1, a1, 2
-; RV32IMZBS-NEXT: or a0, a4, a0
-; RV32IMZBS-NEXT: or a1, a5, a1
-; RV32IMZBS-NEXT: srli a4, a0, 1
-; RV32IMZBS-NEXT: and a0, a0, t2
-; RV32IMZBS-NEXT: srli a5, a1, 1
-; RV32IMZBS-NEXT: and a1, a1, t2
-; RV32IMZBS-NEXT: and a4, a4, t2
-; RV32IMZBS-NEXT: and a5, a5, t2
-; RV32IMZBS-NEXT: slli a0, a0, 1
-; RV32IMZBS-NEXT: or a0, a4, a0
-; RV32IMZBS-NEXT: slli a1, a1, 1
-; RV32IMZBS-NEXT: or a1, a5, a1
-; RV32IMZBS-NEXT: lw a4, 676(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: sw a0, 0(a4)
-; RV32IMZBS-NEXT: sw a2, 4(a4)
-; RV32IMZBS-NEXT: sw a1, 8(a4)
-; RV32IMZBS-NEXT: sw a3, 12(a4)
-; RV32IMZBS-NEXT: lw a4, 680(sp) # 4-byte Folded Reload
-; RV32IMZBS-NEXT: sw a0, 0(a4)
-; RV32IMZBS-NEXT: sw a2, 4(a4)
-; RV32IMZBS-NEXT: sw a1, 8(a4)
-; RV32IMZBS-NEXT: sw a3, 12(a4)
+; RV32IMZBS-NEXT: slli a2, a2, 4
+; RV32IMZBS-NEXT: slli a3, a3, 4
+; RV32IMZBS-NEXT: or a2, a4, a2
+; RV32IMZBS-NEXT: or a3, a5, a3
+; RV32IMZBS-NEXT: srli a4, a2, 2
+; RV32IMZBS-NEXT: and a2, a2, s6
+; RV32IMZBS-NEXT: srli a5, a3, 2
+; RV32IMZBS-NEXT: and a3, a3, s6
+; RV32IMZBS-NEXT: and a4, a4, s6
+; RV32IMZBS-NEXT: and a5, a5, s6
+; RV32IMZBS-NEXT: slli a2, a2, 2
+; RV32IMZBS-NEXT: slli a3, a3, 2
+; RV32IMZBS-NEXT: or a2, a4, a2
+; RV32IMZBS-NEXT: or a3, a5, a3
+; RV32IMZBS-NEXT: srli a4, a2, 1
+; RV32IMZBS-NEXT: and a2, a2, s4
+; RV32IMZBS-NEXT: srli a5, a3, 1
+; RV32IMZBS-NEXT: and a3, a3, s4
+; RV32IMZBS-NEXT: and a4, a4, s4
+; RV32IMZBS-NEXT: and a5, a5, s4
+; RV32IMZBS-NEXT: slli a2, a2, 1
+; RV32IMZBS-NEXT: or a2, a4, a2
+; RV32IMZBS-NEXT: slli a3, a3, 1
+; RV32IMZBS-NEXT: or a3, a5, a3
+; RV32IMZBS-NEXT: lw a4, 732(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a2, 0(a4)
+; RV32IMZBS-NEXT: sw a1, 4(a4)
+; RV32IMZBS-NEXT: sw a3, 8(a4)
+; RV32IMZBS-NEXT: sw a0, 12(a4)
+; RV32IMZBS-NEXT: lw a4, 736(sp) # 4-byte Folded Reload
+; RV32IMZBS-NEXT: sw a2, 0(a4)
+; RV32IMZBS-NEXT: sw a1, 4(a4)
+; RV32IMZBS-NEXT: sw a3, 8(a4)
+; RV32IMZBS-NEXT: sw a0, 12(a4)
; RV32IMZBS-NEXT: lw ra, 796(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw s0, 792(sp) # 4-byte Folded Reload
; RV32IMZBS-NEXT: lw s1, 788(sp) # 4-byte Folded Reload
@@ -12809,1237 +12021,1186 @@ define void @commutative_clmulr_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %
;
; RV64IMZBS-LABEL: commutative_clmulr_v2i64:
; RV64IMZBS: # %bb.0:
-; RV64IMZBS-NEXT: addi sp, sp, -1216
-; RV64IMZBS-NEXT: sd ra, 1208(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s0, 1200(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s1, 1192(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s2, 1184(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s3, 1176(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s4, 1168(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s5, 1160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s6, 1152(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s7, 1144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s8, 1136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s9, 1128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s10, 1120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd s11, 1112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd a5, 1008(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: sd a4, 1000(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv s0, a0
-; RV64IMZBS-NEXT: srli a4, a2, 24
+; RV64IMZBS-NEXT: addi sp, sp, -1088
+; RV64IMZBS-NEXT: sd ra, 1080(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s0, 1072(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s1, 1064(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s2, 1056(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s3, 1048(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s4, 1040(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s5, 1032(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s6, 1024(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s7, 1016(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s8, 1008(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s9, 1000(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s10, 992(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd s11, 984(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd a5, 976(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sd a4, 968(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: mv t3, a3
+; RV64IMZBS-NEXT: mv t5, a0
+; RV64IMZBS-NEXT: srli a5, a2, 24
; RV64IMZBS-NEXT: lui s9, 4080
-; RV64IMZBS-NEXT: srli a5, a2, 8
-; RV64IMZBS-NEXT: li t4, 255
-; RV64IMZBS-NEXT: srli a7, a2, 40
-; RV64IMZBS-NEXT: lui s10, 16
+; RV64IMZBS-NEXT: srli a7, a2, 8
+; RV64IMZBS-NEXT: li s8, 255
+; RV64IMZBS-NEXT: srli a4, a2, 40
+; RV64IMZBS-NEXT: lui a6, 16
; RV64IMZBS-NEXT: srli t0, a2, 56
; RV64IMZBS-NEXT: srliw t1, a2, 24
; RV64IMZBS-NEXT: slli a0, a2, 56
-; RV64IMZBS-NEXT: sd a0, 1096(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui t5, 61681
-; RV64IMZBS-NEXT: lui s11, 209715
-; RV64IMZBS-NEXT: lui s2, 349525
-; RV64IMZBS-NEXT: srli s1, s0, 24
-; RV64IMZBS-NEXT: srli s8, s0, 8
-; RV64IMZBS-NEXT: srli a6, s0, 40
-; RV64IMZBS-NEXT: srli t3, s0, 56
-; RV64IMZBS-NEXT: srliw t2, s0, 24
-; RV64IMZBS-NEXT: slli s5, s0, 56
-; RV64IMZBS-NEXT: srli s4, a3, 24
-; RV64IMZBS-NEXT: srli s7, a3, 8
-; RV64IMZBS-NEXT: srli s3, a3, 40
-; RV64IMZBS-NEXT: srli s6, a3, 56
-; RV64IMZBS-NEXT: and a4, a4, s9
-; RV64IMZBS-NEXT: slli t6, t4, 24
-; RV64IMZBS-NEXT: and a5, a5, t6
-; RV64IMZBS-NEXT: or a5, a5, a4
-; RV64IMZBS-NEXT: addi a0, s10, -256
-; RV64IMZBS-NEXT: and a4, a7, a0
-; RV64IMZBS-NEXT: or a7, a4, t0
+; RV64IMZBS-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: lui s10, 61681
+; RV64IMZBS-NEXT: lui s1, 209715
+; RV64IMZBS-NEXT: lui s4, 349525
+; RV64IMZBS-NEXT: srli t2, t5, 24
+; RV64IMZBS-NEXT: srli t6, t5, 8
+; RV64IMZBS-NEXT: srli s5, t5, 40
+; RV64IMZBS-NEXT: srli s2, t5, 56
+; RV64IMZBS-NEXT: srliw s3, t5, 24
+; RV64IMZBS-NEXT: slli a0, t5, 56
+; RV64IMZBS-NEXT: sd a0, 912(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: srli s7, a3, 24
+; RV64IMZBS-NEXT: srli a3, a3, 8
+; RV64IMZBS-NEXT: srli s6, t3, 40
+; RV64IMZBS-NEXT: srli s11, t3, 56
+; RV64IMZBS-NEXT: and a5, a5, s9
+; RV64IMZBS-NEXT: slli s8, s8, 24
+; RV64IMZBS-NEXT: and a7, a7, s8
+; RV64IMZBS-NEXT: or a5, a7, a5
+; RV64IMZBS-NEXT: srliw s0, t3, 24
+; RV64IMZBS-NEXT: addi a0, a6, -256
+; RV64IMZBS-NEXT: and a4, a4, a0
+; RV64IMZBS-NEXT: or a6, a4, t0
; RV64IMZBS-NEXT: and a4, a2, s9
; RV64IMZBS-NEXT: slli t1, t1, 32
-; RV64IMZBS-NEXT: addi t0, t5, -241
-; RV64IMZBS-NEXT: addi t4, s11, 819
-; RV64IMZBS-NEXT: addi t5, s2, 1365
+; RV64IMZBS-NEXT: addi s10, s10, -241
+; RV64IMZBS-NEXT: addi ra, s1, 819
+; RV64IMZBS-NEXT: addi a7, s4, 1365
; RV64IMZBS-NEXT: slli a4, a4, 24
; RV64IMZBS-NEXT: or a4, a4, t1
-; RV64IMZBS-NEXT: slli t1, t0, 32
-; RV64IMZBS-NEXT: add ra, t0, t1
-; RV64IMZBS-NEXT: slli t0, t4, 32
-; RV64IMZBS-NEXT: add s11, t4, t0
-; RV64IMZBS-NEXT: slli t0, t5, 32
-; RV64IMZBS-NEXT: add s10, t5, t0
-; RV64IMZBS-NEXT: srliw t1, a3, 24
-; RV64IMZBS-NEXT: and t0, s1, s9
-; RV64IMZBS-NEXT: and t4, s8, t6
-; RV64IMZBS-NEXT: or t0, t4, t0
-; RV64IMZBS-NEXT: srli t4, a1, 24
-; RV64IMZBS-NEXT: and a6, a6, a0
-; RV64IMZBS-NEXT: or t3, a6, t3
-; RV64IMZBS-NEXT: and a6, s0, s9
-; RV64IMZBS-NEXT: slli t2, t2, 32
-; RV64IMZBS-NEXT: slli a6, a6, 24
-; RV64IMZBS-NEXT: or a6, a6, t2
-; RV64IMZBS-NEXT: srli t2, a1, 8
-; RV64IMZBS-NEXT: and t5, s4, s9
-; RV64IMZBS-NEXT: mv s4, t6
-; RV64IMZBS-NEXT: sd t6, 992(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and t6, s7, t6
-; RV64IMZBS-NEXT: or t5, t6, t5
-; RV64IMZBS-NEXT: srli t6, a1, 40
-; RV64IMZBS-NEXT: and s1, s3, a0
-; RV64IMZBS-NEXT: or s1, s1, s6
-; RV64IMZBS-NEXT: and s2, a3, s9
-; RV64IMZBS-NEXT: slli t1, t1, 32
-; RV64IMZBS-NEXT: slli s2, s2, 24
-; RV64IMZBS-NEXT: or t1, s2, t1
-; RV64IMZBS-NEXT: srli s2, a1, 56
-; RV64IMZBS-NEXT: and t4, t4, s9
-; RV64IMZBS-NEXT: and t2, t2, s4
-; RV64IMZBS-NEXT: or t2, t2, t4
-; RV64IMZBS-NEXT: srliw t4, a1, 24
-; RV64IMZBS-NEXT: and t6, t6, a0
-; RV64IMZBS-NEXT: or t6, t6, s2
-; RV64IMZBS-NEXT: and s2, a1, s9
-; RV64IMZBS-NEXT: slli t4, t4, 32
-; RV64IMZBS-NEXT: slli s2, s2, 24
-; RV64IMZBS-NEXT: or t4, s2, t4
-; RV64IMZBS-NEXT: bseti s2, zero, 11
-; RV64IMZBS-NEXT: sd s2, 1104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: or a5, a5, a7
-; RV64IMZBS-NEXT: mv s2, a0
-; RV64IMZBS-NEXT: sd a0, 976(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli t0, s10, 32
+; RV64IMZBS-NEXT: add s10, s10, t0
+; RV64IMZBS-NEXT: slli t0, ra, 32
+; RV64IMZBS-NEXT: add ra, ra, t0
+; RV64IMZBS-NEXT: slli t0, a7, 32
+; RV64IMZBS-NEXT: add t4, a7, t0
+; RV64IMZBS-NEXT: slli a7, t3, 56
+; RV64IMZBS-NEXT: and t0, t2, s9
+; RV64IMZBS-NEXT: and t1, t6, s8
+; RV64IMZBS-NEXT: or t0, t1, t0
+; RV64IMZBS-NEXT: srli t1, a1, 24
+; RV64IMZBS-NEXT: and t2, s5, a0
+; RV64IMZBS-NEXT: or t2, t2, s2
+; RV64IMZBS-NEXT: lui s5, 4080
+; RV64IMZBS-NEXT: and t6, t5, s5
+; RV64IMZBS-NEXT: slli s3, s3, 32
+; RV64IMZBS-NEXT: slli t6, t6, 24
+; RV64IMZBS-NEXT: or t6, t6, s3
+; RV64IMZBS-NEXT: srli s1, a1, 8
+; RV64IMZBS-NEXT: and s2, s7, s5
+; RV64IMZBS-NEXT: sd s8, 960(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and a3, a3, s8
+; RV64IMZBS-NEXT: or a3, a3, s2
+; RV64IMZBS-NEXT: srli s2, a1, 40
+; RV64IMZBS-NEXT: and s3, s6, a0
+; RV64IMZBS-NEXT: or s3, s3, s11
+; RV64IMZBS-NEXT: and s4, t3, s5
+; RV64IMZBS-NEXT: slli s0, s0, 32
+; RV64IMZBS-NEXT: slli s4, s4, 24
+; RV64IMZBS-NEXT: or s0, s4, s0
+; RV64IMZBS-NEXT: srli s4, a1, 56
+; RV64IMZBS-NEXT: and t1, t1, s5
+; RV64IMZBS-NEXT: and s1, s1, s8
+; RV64IMZBS-NEXT: or t1, s1, t1
+; RV64IMZBS-NEXT: srliw s1, a1, 24
+; RV64IMZBS-NEXT: and s2, s2, a0
+; RV64IMZBS-NEXT: or s2, s2, s4
+; RV64IMZBS-NEXT: and s4, a1, s5
+; RV64IMZBS-NEXT: slli s1, s1, 32
+; RV64IMZBS-NEXT: slli s4, s4, 24
+; RV64IMZBS-NEXT: or s1, s4, s1
+; RV64IMZBS-NEXT: slli s4, a1, 56
+; RV64IMZBS-NEXT: mv s5, a0
+; RV64IMZBS-NEXT: sd a0, 936(sp) # 8-byte Folded Spill
; RV64IMZBS-NEXT: and a2, a2, a0
+; RV64IMZBS-NEXT: and a0, t5, a0
+; RV64IMZBS-NEXT: and t3, t3, s5
+; RV64IMZBS-NEXT: and a1, a1, s5
; RV64IMZBS-NEXT: slli a2, a2, 40
-; RV64IMZBS-NEXT: ld a0, 1096(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: or a2, a0, a2
-; RV64IMZBS-NEXT: or a7, t0, t3
-; RV64IMZBS-NEXT: and a0, s0, s2
; RV64IMZBS-NEXT: slli a0, a0, 40
-; RV64IMZBS-NEXT: or a0, s5, a0
-; RV64IMZBS-NEXT: or t0, t5, s1
-; RV64IMZBS-NEXT: slli t3, a3, 56
-; RV64IMZBS-NEXT: and a3, a3, s2
-; RV64IMZBS-NEXT: slli a3, a3, 40
-; RV64IMZBS-NEXT: or a3, t3, a3
-; RV64IMZBS-NEXT: or t2, t2, t6
-; RV64IMZBS-NEXT: slli t3, a1, 56
-; RV64IMZBS-NEXT: and a1, a1, s2
+; RV64IMZBS-NEXT: slli t3, t3, 40
; RV64IMZBS-NEXT: slli a1, a1, 40
-; RV64IMZBS-NEXT: or a1, t3, a1
+; RV64IMZBS-NEXT: or a5, a5, a6
+; RV64IMZBS-NEXT: ld a6, 920(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: or a2, a6, a2
+; RV64IMZBS-NEXT: or a6, t0, t2
+; RV64IMZBS-NEXT: ld t0, 912(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: or a0, t0, a0
+; RV64IMZBS-NEXT: or a3, a3, s3
+; RV64IMZBS-NEXT: or a7, a7, t3
+; RV64IMZBS-NEXT: or t0, t1, s2
+; RV64IMZBS-NEXT: or a1, s4, a1
; RV64IMZBS-NEXT: or a2, a2, a4
-; RV64IMZBS-NEXT: or a0, a0, a6
-; RV64IMZBS-NEXT: or a3, a3, t1
-; RV64IMZBS-NEXT: lui s1, 512
-; RV64IMZBS-NEXT: or a1, a1, t4
+; RV64IMZBS-NEXT: or a0, a0, t6
+; RV64IMZBS-NEXT: or a4, a7, s0
+; RV64IMZBS-NEXT: or a1, a1, s1
; RV64IMZBS-NEXT: or a2, a2, a5
-; RV64IMZBS-NEXT: or a0, a0, a7
-; RV64IMZBS-NEXT: or a3, a3, t0
-; RV64IMZBS-NEXT: or a1, a1, t2
+; RV64IMZBS-NEXT: or a0, a0, a6
+; RV64IMZBS-NEXT: or a3, a4, a3
+; RV64IMZBS-NEXT: or a1, a1, t0
; RV64IMZBS-NEXT: srli a4, a2, 4
-; RV64IMZBS-NEXT: sd ra, 984(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a2, ra
-; RV64IMZBS-NEXT: and a4, a4, ra
+; RV64IMZBS-NEXT: sd s10, 952(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and a2, a2, s10
+; RV64IMZBS-NEXT: srli a5, a0, 4
+; RV64IMZBS-NEXT: and a0, a0, s10
+; RV64IMZBS-NEXT: srli a6, a3, 4
+; RV64IMZBS-NEXT: and a3, a3, s10
+; RV64IMZBS-NEXT: srli a7, a1, 4
+; RV64IMZBS-NEXT: and a1, a1, s10
+; RV64IMZBS-NEXT: and a4, a4, s10
; RV64IMZBS-NEXT: slli a2, a2, 4
+; RV64IMZBS-NEXT: and a5, a5, s10
+; RV64IMZBS-NEXT: slli a0, a0, 4
+; RV64IMZBS-NEXT: and a6, a6, s10
+; RV64IMZBS-NEXT: slli a3, a3, 4
+; RV64IMZBS-NEXT: and a7, a7, s10
+; RV64IMZBS-NEXT: slli a1, a1, 4
; RV64IMZBS-NEXT: or a2, a4, a2
-; RV64IMZBS-NEXT: srli a4, a0, 4
+; RV64IMZBS-NEXT: or a0, a5, a0
+; RV64IMZBS-NEXT: or a3, a6, a3
+; RV64IMZBS-NEXT: or a1, a7, a1
+; RV64IMZBS-NEXT: srli a4, a2, 2
+; RV64IMZBS-NEXT: sd ra, 944(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and a2, a2, ra
+; RV64IMZBS-NEXT: srli a5, a0, 2
; RV64IMZBS-NEXT: and a0, a0, ra
-; RV64IMZBS-NEXT: and a4, a4, ra
-; RV64IMZBS-NEXT: slli a0, a0, 4
-; RV64IMZBS-NEXT: or a0, a4, a0
-; RV64IMZBS-NEXT: srli a4, a3, 4
+; RV64IMZBS-NEXT: srli a6, a3, 2
; RV64IMZBS-NEXT: and a3, a3, ra
-; RV64IMZBS-NEXT: and a4, a4, ra
-; RV64IMZBS-NEXT: slli a3, a3, 4
-; RV64IMZBS-NEXT: or a3, a4, a3
-; RV64IMZBS-NEXT: srli a4, a1, 4
+; RV64IMZBS-NEXT: srli a7, a1, 2
; RV64IMZBS-NEXT: and a1, a1, ra
; RV64IMZBS-NEXT: and a4, a4, ra
-; RV64IMZBS-NEXT: slli a1, a1, 4
-; RV64IMZBS-NEXT: or a1, a4, a1
-; RV64IMZBS-NEXT: srli a4, a2, 2
-; RV64IMZBS-NEXT: sd s11, 968(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a2, s11
-; RV64IMZBS-NEXT: and a4, a4, s11
; RV64IMZBS-NEXT: slli a2, a2, 2
-; RV64IMZBS-NEXT: or a2, a4, a2
-; RV64IMZBS-NEXT: srli a4, a0, 2
-; RV64IMZBS-NEXT: and a0, a0, s11
-; RV64IMZBS-NEXT: and a4, a4, s11
+; RV64IMZBS-NEXT: and a5, a5, ra
; RV64IMZBS-NEXT: slli a0, a0, 2
-; RV64IMZBS-NEXT: or a0, a4, a0
-; RV64IMZBS-NEXT: srli a4, a3, 2
-; RV64IMZBS-NEXT: and a3, a3, s11
-; RV64IMZBS-NEXT: and a4, a4, s11
+; RV64IMZBS-NEXT: and a6, a6, ra
; RV64IMZBS-NEXT: slli a3, a3, 2
-; RV64IMZBS-NEXT: or a3, a4, a3
-; RV64IMZBS-NEXT: srli a4, a1, 2
-; RV64IMZBS-NEXT: and a1, a1, s11
-; RV64IMZBS-NEXT: and a4, a4, s11
+; RV64IMZBS-NEXT: and a7, a7, ra
; RV64IMZBS-NEXT: slli a1, a1, 2
-; RV64IMZBS-NEXT: or a5, a4, a1
-; RV64IMZBS-NEXT: srli a1, a2, 1
-; RV64IMZBS-NEXT: sd s10, 960(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a2, a2, s10
-; RV64IMZBS-NEXT: and a1, a1, s10
-; RV64IMZBS-NEXT: slli a2, a2, 1
-; RV64IMZBS-NEXT: or s5, a1, a2
-; RV64IMZBS-NEXT: srli a1, a0, 1
-; RV64IMZBS-NEXT: and a0, a0, s10
-; RV64IMZBS-NEXT: and a1, a1, s10
-; RV64IMZBS-NEXT: slli a0, a0, 1
-; RV64IMZBS-NEXT: or a0, a1, a0
-; RV64IMZBS-NEXT: srli a1, a3, 1
-; RV64IMZBS-NEXT: and a2, a3, s10
-; RV64IMZBS-NEXT: and a4, a1, s10
+; RV64IMZBS-NEXT: or a2, a4, a2
+; RV64IMZBS-NEXT: or a0, a5, a0
+; RV64IMZBS-NEXT: or a3, a6, a3
+; RV64IMZBS-NEXT: or a1, a7, a1
+; RV64IMZBS-NEXT: srli a4, a2, 1
+; RV64IMZBS-NEXT: sd t4, 928(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and a2, a2, t4
+; RV64IMZBS-NEXT: srli a5, a0, 1
+; RV64IMZBS-NEXT: and a0, a0, t4
+; RV64IMZBS-NEXT: srli a6, a3, 1
+; RV64IMZBS-NEXT: and a3, a3, t4
+; RV64IMZBS-NEXT: srli a7, a1, 1
+; RV64IMZBS-NEXT: and a1, a1, t4
+; RV64IMZBS-NEXT: and a4, a4, t4
; RV64IMZBS-NEXT: slli a2, a2, 1
-; RV64IMZBS-NEXT: or a4, a4, a2
-; RV64IMZBS-NEXT: srli a2, a5, 1
-; RV64IMZBS-NEXT: and a3, a5, s10
-; RV64IMZBS-NEXT: and a2, a2, s10
+; RV64IMZBS-NEXT: and a5, a5, t4
+; RV64IMZBS-NEXT: slli t0, a0, 1
+; RV64IMZBS-NEXT: and a6, a6, t4
; RV64IMZBS-NEXT: slli a3, a3, 1
-; RV64IMZBS-NEXT: or s11, a2, a3
-; RV64IMZBS-NEXT: bseti a2, zero, 31
-; RV64IMZBS-NEXT: sd a2, 1024(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 32
-; RV64IMZBS-NEXT: sd a2, 1032(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 33
-; RV64IMZBS-NEXT: sd a2, 1040(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 34
-; RV64IMZBS-NEXT: sd a2, 1048(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 35
-; RV64IMZBS-NEXT: sd a2, 1056(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 36
-; RV64IMZBS-NEXT: sd a2, 1064(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 37
-; RV64IMZBS-NEXT: sd a2, 1072(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 38
-; RV64IMZBS-NEXT: sd a2, 1080(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 39
-; RV64IMZBS-NEXT: sd a2, 1088(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a2, zero, 40
-; RV64IMZBS-NEXT: sd a2, 1096(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a1, zero, 41
-; RV64IMZBS-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti ra, zero, 42
-; RV64IMZBS-NEXT: bseti s10, zero, 43
-; RV64IMZBS-NEXT: bseti s9, zero, 44
-; RV64IMZBS-NEXT: bseti s8, zero, 45
-; RV64IMZBS-NEXT: bseti s7, zero, 46
-; RV64IMZBS-NEXT: bseti s6, zero, 47
-; RV64IMZBS-NEXT: bseti s4, zero, 48
-; RV64IMZBS-NEXT: bseti s3, zero, 49
-; RV64IMZBS-NEXT: bseti s2, zero, 50
-; RV64IMZBS-NEXT: bseti s0, zero, 51
-; RV64IMZBS-NEXT: bseti t6, zero, 52
-; RV64IMZBS-NEXT: bseti t5, zero, 53
-; RV64IMZBS-NEXT: bseti t4, zero, 54
-; RV64IMZBS-NEXT: bseti t3, zero, 55
-; RV64IMZBS-NEXT: sd t3, 432(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti t2, zero, 56
-; RV64IMZBS-NEXT: sd t2, 440(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti t1, zero, 57
-; RV64IMZBS-NEXT: bseti t0, zero, 58
-; RV64IMZBS-NEXT: bseti a7, zero, 59
-; RV64IMZBS-NEXT: bseti a6, zero, 60
-; RV64IMZBS-NEXT: bseti a1, zero, 61
-; RV64IMZBS-NEXT: sd a1, 1016(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: bseti a3, zero, 62
-; RV64IMZBS-NEXT: bseti a2, zero, 63
-; RV64IMZBS-NEXT: andi a5, a0, 2
-; RV64IMZBS-NEXT: sd a5, 952(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 1
-; RV64IMZBS-NEXT: sd a5, 944(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 4
-; RV64IMZBS-NEXT: sd a5, 936(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 8
-; RV64IMZBS-NEXT: sd a5, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 16
-; RV64IMZBS-NEXT: sd a5, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 32
-; RV64IMZBS-NEXT: sd a5, 912(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 64
-; RV64IMZBS-NEXT: sd a5, 904(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 128
-; RV64IMZBS-NEXT: sd a5, 896(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 256
-; RV64IMZBS-NEXT: sd a5, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 512
-; RV64IMZBS-NEXT: sd a5, 880(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a5, a0, 1024
-; RV64IMZBS-NEXT: sd a5, 872(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 864(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 1
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 2
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 848(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 4
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 840(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 8
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 832(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 16
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 824(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 32
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 816(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 64
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 808(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 128
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 792(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 256
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 784(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s1, a0, s1
-; RV64IMZBS-NEXT: sd s1, 768(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui s1, 1024
-; RV64IMZBS-NEXT: and s1, a0, s1
-; RV64IMZBS-NEXT: lui a5, 2048
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 760(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 4096
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 752(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 8192
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 744(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 16384
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 728(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 32768
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 712(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 65536
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 696(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 131072
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 680(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a5, 262144
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 656(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1024(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 624(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1032(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 608(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1040(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 592(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 552(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 528(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 520(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1072(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 512(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 504(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1088(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 496(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 1096(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a5, a0, a5
-; RV64IMZBS-NEXT: sd a5, 464(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a5, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, a0, a5
-; RV64IMZBS-NEXT: sd a1, 456(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, ra
-; RV64IMZBS-NEXT: sd a1, 424(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, s10
-; RV64IMZBS-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, s9
-; RV64IMZBS-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, s8
-; RV64IMZBS-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, s7
-; RV64IMZBS-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, s6
-; RV64IMZBS-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, s4
-; RV64IMZBS-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, s3
-; RV64IMZBS-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, s2
-; RV64IMZBS-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, s0
-; RV64IMZBS-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, t6
-; RV64IMZBS-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, t5
-; RV64IMZBS-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, t4
-; RV64IMZBS-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, t3
-; RV64IMZBS-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, t2
-; RV64IMZBS-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, t1
-; RV64IMZBS-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv t2, t1
-; RV64IMZBS-NEXT: and a1, a0, t0
-; RV64IMZBS-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv t1, t0
-; RV64IMZBS-NEXT: and a1, a0, a7
-; RV64IMZBS-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv t0, a7
-; RV64IMZBS-NEXT: and a1, a0, a6
-; RV64IMZBS-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv a7, a6
-; RV64IMZBS-NEXT: ld a1, 1016(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, a0, a1
-; RV64IMZBS-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, a0, a3
-; RV64IMZBS-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mv a6, a3
-; RV64IMZBS-NEXT: and a1, a0, a2
-; RV64IMZBS-NEXT: mv a3, a2
-; RV64IMZBS-NEXT: ld a2, 952(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, s5, a2
-; RV64IMZBS-NEXT: sd t3, 664(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 488(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 480(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 472(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 448(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 912(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, s5, a2
-; RV64IMZBS-NEXT: sd t3, 648(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, s5, a2
-; RV64IMZBS-NEXT: sd t3, 736(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 896(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, s5, a2
-; RV64IMZBS-NEXT: sd t3, 904(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, s5, a2
-; RV64IMZBS-NEXT: sd t3, 640(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 880(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, s5, a2
-; RV64IMZBS-NEXT: sd t3, 632(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, s5, a2
-; RV64IMZBS-NEXT: sd t3, 720(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 864(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, s5, a2
-; RV64IMZBS-NEXT: sd t3, 800(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 856(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul t3, s5, a2
-; RV64IMZBS-NEXT: sd t3, 880(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: and a7, a7, t4
+; RV64IMZBS-NEXT: slli a0, a1, 1
+; RV64IMZBS-NEXT: or t4, a4, a2
+; RV64IMZBS-NEXT: or t5, a5, t0
+; RV64IMZBS-NEXT: srli a2, t0, 63
+; RV64IMZBS-NEXT: or t0, a6, a3
+; RV64IMZBS-NEXT: or s5, a7, a0
+; RV64IMZBS-NEXT: srli a0, a0, 63
+; RV64IMZBS-NEXT: slli a3, t4, 1
+; RV64IMZBS-NEXT: andi a4, t5, 2
+; RV64IMZBS-NEXT: slli a5, t4, 2
+; RV64IMZBS-NEXT: andi a6, t5, 4
+; RV64IMZBS-NEXT: slli a7, t4, 3
+; RV64IMZBS-NEXT: andi t1, t5, 8
+; RV64IMZBS-NEXT: slli t2, t4, 4
+; RV64IMZBS-NEXT: andi t3, t5, 16
+; RV64IMZBS-NEXT: slli t6, t4, 5
+; RV64IMZBS-NEXT: andi s0, t5, 32
+; RV64IMZBS-NEXT: slli s1, t4, 6
+; RV64IMZBS-NEXT: andi s2, t5, 64
+; RV64IMZBS-NEXT: slli s3, t4, 7
+; RV64IMZBS-NEXT: slli s4, t4, 63
+; RV64IMZBS-NEXT: seqz a2, a2
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: and a1, a2, s4
+; RV64IMZBS-NEXT: sd a1, 912(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a2, t0, 63
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 920(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t5, 128
+; RV64IMZBS-NEXT: seqz a2, a4
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 880(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a2, t4, 8
+; RV64IMZBS-NEXT: seqz a3, a6
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, a5
+; RV64IMZBS-NEXT: sd a3, 864(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a3, t5, 256
+; RV64IMZBS-NEXT: seqz a4, t1
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a1, a4, a7
+; RV64IMZBS-NEXT: sd a1, 848(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a4, t4, 9
+; RV64IMZBS-NEXT: seqz a5, t3
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: and t2, a5, t2
+; RV64IMZBS-NEXT: andi a5, t5, 512
+; RV64IMZBS-NEXT: seqz a6, s0
+; RV64IMZBS-NEXT: addi a6, a6, -1
+; RV64IMZBS-NEXT: and t3, a6, t6
+; RV64IMZBS-NEXT: slli a6, t4, 10
+; RV64IMZBS-NEXT: seqz a7, s2
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: and a1, a7, s1
+; RV64IMZBS-NEXT: sd a1, 888(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a7, t5, 1024
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and t6, a0, s3
+; RV64IMZBS-NEXT: slli a0, t4, 11
+; RV64IMZBS-NEXT: seqz a3, a3
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and s2, a3, a2
+; RV64IMZBS-NEXT: not a2, t5
+; RV64IMZBS-NEXT: seqz a3, a5
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: and a3, a3, a4
+; RV64IMZBS-NEXT: sd a3, 840(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: slli a3, t4, 12
+; RV64IMZBS-NEXT: seqz a4, a7
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a1, a4, a6
+; RV64IMZBS-NEXT: sd a1, 904(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a4, a2, 11
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: and a6, a4, a0
+; RV64IMZBS-NEXT: bexti a0, a2, 12
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 776(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 13
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 13
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 816(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 14
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 14
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 856(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 15
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 15
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 896(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 16
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 16
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 736(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 17
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 17
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 728(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 18
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 18
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 768(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 19
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 19
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 800(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 20
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 20
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 832(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 21
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 21
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 872(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 22
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 22
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 680(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 23
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 23
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 664(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 24
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 24
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 712(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 25
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 25
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 752(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 26
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 26
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 792(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 27
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 27
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 808(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 28
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 28
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 824(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 29
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 29
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 608(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 30
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 30
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 600(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sraiw a0, t5, 31
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 31
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 648(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 32
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 32
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 672(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 33
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 33
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 704(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 34
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 34
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 744(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 35
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 35
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 760(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 36
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 36
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 784(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 37
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 37
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 552(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 38
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 38
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 544(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 39
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 39
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 568(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 40
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 40
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 592(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 41
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 41
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 640(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 42
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 42
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 656(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 43
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 43
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 688(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 44
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 44
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 696(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 45
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 45
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 720(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 46
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 46
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 496(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 47
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 47
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 488(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 48
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 48
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 520(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 49
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 49
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 536(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 50
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 50
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 560(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 51
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 51
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 576(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 52
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 52
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 584(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 53
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 53
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 616(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 54
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 54
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 624(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 55
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 55
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 632(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 56
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 56
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 464(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 57
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 57
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 456(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 58
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 58
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 472(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 59
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 59
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 480(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 60
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 60
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 504(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a0, a2, 61
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a3, t4, 61
+; RV64IMZBS-NEXT: and a0, a0, a3
+; RV64IMZBS-NEXT: sd a0, 512(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, t5, 1
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and t5, a0, t4
+; RV64IMZBS-NEXT: slli t4, t4, 62
+; RV64IMZBS-NEXT: bexti a0, a2, 62
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and a0, a0, t4
+; RV64IMZBS-NEXT: sd a0, 528(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 2
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 1
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 448(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 4
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 2
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 432(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 8
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 3
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 408(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 16
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 4
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 400(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 32
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 5
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 376(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 64
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 6
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 440(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 128
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 7
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 360(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 256
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 8
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 344(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 512
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 9
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 392(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a0, s5, 1024
+; RV64IMZBS-NEXT: seqz a0, a0
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: slli a2, t0, 10
+; RV64IMZBS-NEXT: and a0, a0, a2
+; RV64IMZBS-NEXT: sd a0, 424(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: not a0, s5
+; RV64IMZBS-NEXT: bexti a2, a0, 11
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 11
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 304(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 12
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 12
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 296(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 13
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 13
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 328(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 14
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 14
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 368(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 15
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 15
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 416(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 16
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 16
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 248(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 17
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 17
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 232(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 18
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 18
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 280(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 19
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 19
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 320(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 20
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 20
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 352(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 21
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 21
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 384(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 22
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 22
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 192(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 23
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 23
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 168(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 24
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 24
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 224(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 25
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 25
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 26
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 26
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 288(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 27
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 27
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 312(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 28
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 28
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 336(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 29
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 29
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 136(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 30
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 30
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 112(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: sraiw a2, s5, 31
+; RV64IMZBS-NEXT: seqz a2, a2
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 31
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 32
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 32
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 184(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 33
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 33
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 34
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 34
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 240(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 35
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 35
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 36
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 36
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 37
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 37
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 56(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 38
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 38
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 48(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 39
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 39
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 72(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 40
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 40
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 96(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 41
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 41
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 144(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 42
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 42
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 160(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 43
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 43
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 176(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 44
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 44
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 45
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 45
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 46
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 46
+; RV64IMZBS-NEXT: and s11, a2, a3
+; RV64IMZBS-NEXT: bexti a2, a0, 47
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a4, t0, 47
+; RV64IMZBS-NEXT: and s9, a2, a4
+; RV64IMZBS-NEXT: bexti a2, a0, 48
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 48
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 49
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 49
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 40(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 50
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 50
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 64(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 51
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli s0, t0, 51
+; RV64IMZBS-NEXT: and a2, a2, s0
+; RV64IMZBS-NEXT: sd a2, 80(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 52
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli s1, t0, 52
+; RV64IMZBS-NEXT: and a2, a2, s1
+; RV64IMZBS-NEXT: sd a2, 88(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 53
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 53
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 104(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 54
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 54
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 128(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 55
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 55
+; RV64IMZBS-NEXT: and a2, a2, a3
+; RV64IMZBS-NEXT: sd a2, 120(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: bexti a2, a0, 56
+; RV64IMZBS-NEXT: addi a2, a2, -1
+; RV64IMZBS-NEXT: slli a3, t0, 56
+; RV64IMZBS-NEXT: and s6, a2, a3
+; RV64IMZBS-NEXT: bexti a3, a0, 57
+; RV64IMZBS-NEXT: addi a3, a3, -1
+; RV64IMZBS-NEXT: slli a4, t0, 57
+; RV64IMZBS-NEXT: and s4, a3, a4
+; RV64IMZBS-NEXT: bexti a4, a0, 58
+; RV64IMZBS-NEXT: addi a4, a4, -1
+; RV64IMZBS-NEXT: slli a5, t0, 58
+; RV64IMZBS-NEXT: and s7, a4, a5
+; RV64IMZBS-NEXT: bexti a5, a0, 59
+; RV64IMZBS-NEXT: addi a5, a5, -1
+; RV64IMZBS-NEXT: slli a7, t0, 59
+; RV64IMZBS-NEXT: and s8, a5, a7
+; RV64IMZBS-NEXT: bexti a7, a0, 60
+; RV64IMZBS-NEXT: addi a7, a7, -1
+; RV64IMZBS-NEXT: slli t1, t0, 60
+; RV64IMZBS-NEXT: and s10, a7, t1
+; RV64IMZBS-NEXT: bexti t1, a0, 61
+; RV64IMZBS-NEXT: addi t1, t1, -1
+; RV64IMZBS-NEXT: slli a1, t0, 61
+; RV64IMZBS-NEXT: and a1, t1, a1
+; RV64IMZBS-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: andi a1, s5, 1
+; RV64IMZBS-NEXT: seqz a1, a1
+; RV64IMZBS-NEXT: addi a1, a1, -1
+; RV64IMZBS-NEXT: and a1, a1, t0
+; RV64IMZBS-NEXT: slli t0, t0, 62
+; RV64IMZBS-NEXT: bexti a0, a0, 62
+; RV64IMZBS-NEXT: addi a0, a0, -1
+; RV64IMZBS-NEXT: and a0, a0, t0
+; RV64IMZBS-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: ld a0, 880(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s3, t5, a0
+; RV64IMZBS-NEXT: ld a0, 864(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: ld a2, 848(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 616(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 840(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 600(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 832(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 704(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 824(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 776(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 816(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 856(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 808(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 928(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 792(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 584(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 784(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 576(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 768(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 688(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a2, s5, s1
-; RV64IMZBS-NEXT: sd a2, 768(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 760(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 832(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 752(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 568(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 744(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 560(sp) # 8-byte Folded Spill
+; RV64IMZBS-NEXT: xor s5, a0, a2
+; RV64IMZBS-NEXT: xor t2, t2, t3
+; RV64IMZBS-NEXT: xor t4, t6, s2
+; RV64IMZBS-NEXT: ld a0, 776(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, a6, a0
+; RV64IMZBS-NEXT: ld a0, 736(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: ld a2, 728(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 672(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 712(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 744(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 696(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 824(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 680(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 888(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 656(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 936(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 624(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 952(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 608(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 544(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 592(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 536(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 552(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 624(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 528(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 712(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 520(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 792(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 512(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 848(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 504(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 912(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 496(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 944(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a2, 464(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a2
-; RV64IMZBS-NEXT: sd a2, 520(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 456(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 512(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 424(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 592(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 416(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 680(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 760(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 816(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 872(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 920(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 504(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 496(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 552(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 656(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 728(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 784(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 840(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 896(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 464(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 456(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 528(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 608(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 696(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 752(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a2, s5, a0
-; RV64IMZBS-NEXT: sd a2, 808(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, s5, a1
-; RV64IMZBS-NEXT: sd a1, 864(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and t3, s11, a1
-; RV64IMZBS-NEXT: lui a1, 1
-; RV64IMZBS-NEXT: and s1, s11, a1
-; RV64IMZBS-NEXT: lui a1, 2
-; RV64IMZBS-NEXT: and s5, s11, a1
-; RV64IMZBS-NEXT: lui a1, 4
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 8
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 424(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 16
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 32
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 64
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 128
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 256
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 512
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 1024
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 2048
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 4096
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 8192
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 16384
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 32768
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 65536
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 131072
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: lui a1, 262144
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1024(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1032(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1040(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1048(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1056(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1064(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1072(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1072(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1080(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1088(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 1088(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1096(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, a5
-; RV64IMZBS-NEXT: sd a1, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, ra
-; RV64IMZBS-NEXT: sd a1, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, s10
-; RV64IMZBS-NEXT: sd a1, 272(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, s9
-; RV64IMZBS-NEXT: sd a1, 264(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s8, s11, s8
-; RV64IMZBS-NEXT: and a1, s11, s7
-; RV64IMZBS-NEXT: sd a1, 248(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, s6
-; RV64IMZBS-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s4, s11, s4
-; RV64IMZBS-NEXT: and s3, s11, s3
-; RV64IMZBS-NEXT: and a1, s11, s2
-; RV64IMZBS-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and s0, s11, s0
-; RV64IMZBS-NEXT: and t6, s11, t6
-; RV64IMZBS-NEXT: and t5, s11, t5
-; RV64IMZBS-NEXT: and t4, s11, t4
-; RV64IMZBS-NEXT: ld a1, 432(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 440(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a1
-; RV64IMZBS-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, t2
-; RV64IMZBS-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, t1
-; RV64IMZBS-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, t0
-; RV64IMZBS-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, a7
-; RV64IMZBS-NEXT: sd a1, 144(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 1016(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a1, s11, a0
-; RV64IMZBS-NEXT: sd a1, 128(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, a6
-; RV64IMZBS-NEXT: sd a1, 120(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: and a1, s11, a3
-; RV64IMZBS-NEXT: sd a1, 96(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: andi a1, s11, 2
-; RV64IMZBS-NEXT: andi a2, s11, 1
-; RV64IMZBS-NEXT: andi a3, s11, 4
-; RV64IMZBS-NEXT: andi a0, s11, 8
-; RV64IMZBS-NEXT: andi a5, s11, 16
-; RV64IMZBS-NEXT: andi a6, s11, 32
-; RV64IMZBS-NEXT: andi a7, s11, 64
-; RV64IMZBS-NEXT: andi t0, s11, 128
-; RV64IMZBS-NEXT: andi t1, s11, 256
-; RV64IMZBS-NEXT: andi t2, s11, 512
-; RV64IMZBS-NEXT: andi s2, s11, 1024
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 112(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a2, a4, a2
-; RV64IMZBS-NEXT: mul a3, a4, a3
-; RV64IMZBS-NEXT: mul a0, a4, a0
-; RV64IMZBS-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, a5
-; RV64IMZBS-NEXT: sd a1, 88(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, a6
-; RV64IMZBS-NEXT: sd a1, 80(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, a7
-; RV64IMZBS-NEXT: sd a1, 200(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, t0
-; RV64IMZBS-NEXT: sd a1, 432(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, t1
-; RV64IMZBS-NEXT: sd a1, 72(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, t2
-; RV64IMZBS-NEXT: sd a1, 64(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, s2
-; RV64IMZBS-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, t3
-; RV64IMZBS-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, s1
-; RV64IMZBS-NEXT: sd a1, 256(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, s5
-; RV64IMZBS-NEXT: sd a1, 56(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 48(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 208(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 440(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 392(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a0, a4, a1
-; RV64IMZBS-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a0, a4, a1
-; RV64IMZBS-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 136(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 368(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 376(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 408(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a0, a4, a1
-; RV64IMZBS-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a0, a4, a1
-; RV64IMZBS-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 360(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 400(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 424(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 1016(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1024(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 1104(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1032(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s10, a4, a1
-; RV64IMZBS-NEXT: ld a1, 1040(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s9, a4, a1
-; RV64IMZBS-NEXT: ld a1, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 304(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 336(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 392(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1072(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 1048(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 1080(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 1088(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 1096(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s7, a4, a1
-; RV64IMZBS-NEXT: ld a1, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s6, a4, a1
-; RV64IMZBS-NEXT: ld a1, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a0, a4, a1
-; RV64IMZBS-NEXT: sd a0, 288(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 320(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 264(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 368(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, s8
-; RV64IMZBS-NEXT: sd a1, 1032(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 248(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 1064(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 240(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 1088(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul s5, a4, s4
-; RV64IMZBS-NEXT: mul s4, a4, s3
-; RV64IMZBS-NEXT: ld a1, 232(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a0, a4, a1
-; RV64IMZBS-NEXT: sd a0, 280(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, s0
-; RV64IMZBS-NEXT: sd a1, 312(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, t6
-; RV64IMZBS-NEXT: sd a1, 344(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, t5
-; RV64IMZBS-NEXT: sd a1, 384(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: mul a1, a4, t4
-; RV64IMZBS-NEXT: sd a1, 1040(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 216(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 1072(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s3, a4, a1
-; RV64IMZBS-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s2, a4, a1
-; RV64IMZBS-NEXT: ld a1, 168(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul s8, a4, a1
-; RV64IMZBS-NEXT: ld a1, 152(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a0, a4, a1
-; RV64IMZBS-NEXT: sd a0, 296(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 328(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 352(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a1, a4, a1
-; RV64IMZBS-NEXT: sd a1, 1024(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a1, 96(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: mul a0, a4, a1
-; RV64IMZBS-NEXT: sd a0, 1056(sp) # 8-byte Folded Spill
-; RV64IMZBS-NEXT: ld a0, 664(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 488(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s1, s1, a0
-; RV64IMZBS-NEXT: ld a0, 480(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 472(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, a0, a1
-; RV64IMZBS-NEXT: ld a0, 648(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 448(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, a1, a0
-; RV64IMZBS-NEXT: ld a0, 640(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 632(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, a0, a1
-; RV64IMZBS-NEXT: ld a0, 616(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 600(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, a0, a1
-; RV64IMZBS-NEXT: ld a0, 584(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 576(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, a0, a2
+; RV64IMZBS-NEXT: ld a0, 680(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 664(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor s0, a0, s0
-; RV64IMZBS-NEXT: ld a0, 568(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a5, 560(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a0, a5
-; RV64IMZBS-NEXT: ld a0, 544(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 536(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a0, a1
-; RV64IMZBS-NEXT: ld a0, 520(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 512(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a0, a1
-; RV64IMZBS-NEXT: ld a0, 504(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 496(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, a0, a1
+; RV64IMZBS-NEXT: ld a0, 608(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 600(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, a0, s1
+; RV64IMZBS-NEXT: ld a0, 552(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 544(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, a0, a2
+; RV64IMZBS-NEXT: ld a0, 496(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 488(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, a0, a2
; RV64IMZBS-NEXT: ld a0, 464(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a1, 456(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, a0, a1
-; RV64IMZBS-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, a2, a0
-; RV64IMZBS-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a0, a3, a0
-; RV64IMZBS-NEXT: ld a1, 88(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a2, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 456(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, a0, a2
+; RV64IMZBS-NEXT: ld a0, 448(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, a1, a0
+; RV64IMZBS-NEXT: ld a0, 432(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 408(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 400(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 376(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a1, a1, a2
-; RV64IMZBS-NEXT: ld a2, 72(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a3, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 360(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a3, 344(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 56(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld a4, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a3, 304(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 296(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: ld a4, 32(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, ra
-; RV64IMZBS-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor ra, ra, s11
-; RV64IMZBS-NEXT: xor s9, s10, s9
-; RV64IMZBS-NEXT: xor s6, s7, s6
-; RV64IMZBS-NEXT: xor s4, s5, s4
-; RV64IMZBS-NEXT: xor s2, s3, s2
-; RV64IMZBS-NEXT: xor t3, s1, t3
-; RV64IMZBS-NEXT: ld s1, 736(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t4, s1
-; RV64IMZBS-NEXT: ld s1, 720(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, t5, s1
-; RV64IMZBS-NEXT: ld s1, 704(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, t6, s1
-; RV64IMZBS-NEXT: ld s1, 688(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s0, s1
-; RV64IMZBS-NEXT: ld s1, 672(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, s1
-; RV64IMZBS-NEXT: ld s1, 624(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, s1
-; RV64IMZBS-NEXT: ld s1, 592(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, s1
-; RV64IMZBS-NEXT: ld s1, 552(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, s1
-; RV64IMZBS-NEXT: ld s1, 528(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, s1
-; RV64IMZBS-NEXT: xor a0, t2, a0
-; RV64IMZBS-NEXT: ld t2, 200(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, a1, t2
-; RV64IMZBS-NEXT: ld t2, 192(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, t2
-; RV64IMZBS-NEXT: ld t2, 160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, t2
-; RV64IMZBS-NEXT: ld t2, 136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, t2
-; RV64IMZBS-NEXT: ld t2, 104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, ra, t2
-; RV64IMZBS-NEXT: ld s1, 304(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s1, s9, s1
-; RV64IMZBS-NEXT: ld s3, 288(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s3, s6, s3
-; RV64IMZBS-NEXT: ld s5, 280(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s4, s4, s5
-; RV64IMZBS-NEXT: xor s2, s2, s8
-; RV64IMZBS-NEXT: xor t3, t3, t4
-; RV64IMZBS-NEXT: ld t4, 800(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t5, t4
-; RV64IMZBS-NEXT: ld t5, 776(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, t6, t5
-; RV64IMZBS-NEXT: ld t6, 768(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, s0, t6
-; RV64IMZBS-NEXT: ld s0, 744(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, s0
-; RV64IMZBS-NEXT: ld s0, 712(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, s0
-; RV64IMZBS-NEXT: ld s0, 680(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, s0
-; RV64IMZBS-NEXT: ld s0, 656(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, s0
-; RV64IMZBS-NEXT: ld s0, 608(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, s0
+; RV64IMZBS-NEXT: ld a4, 248(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a5, 232(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, a5
+; RV64IMZBS-NEXT: ld a5, 192(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a6, 168(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, a6
+; RV64IMZBS-NEXT: ld a6, 136(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a7, 112(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a6, a7
+; RV64IMZBS-NEXT: ld a7, 56(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld ra, 48(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, a7, ra
+; RV64IMZBS-NEXT: xor s9, s11, s9
+; RV64IMZBS-NEXT: xor s4, s6, s4
+; RV64IMZBS-NEXT: xor s3, s3, s5
+; RV64IMZBS-NEXT: ld s5, 888(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t2, s5
+; RV64IMZBS-NEXT: ld s5, 840(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t4, s5
+; RV64IMZBS-NEXT: ld s5, 816(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, t5, s5
+; RV64IMZBS-NEXT: ld s5, 768(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, t6, s5
+; RV64IMZBS-NEXT: ld s5, 712(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s0, s5
+; RV64IMZBS-NEXT: ld s5, 648(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s1, s5
+; RV64IMZBS-NEXT: ld s5, 568(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, s2, s5
+; RV64IMZBS-NEXT: ld s5, 520(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s5
+; RV64IMZBS-NEXT: ld s5, 472(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s5
+; RV64IMZBS-NEXT: xor a0, t3, a0
+; RV64IMZBS-NEXT: ld t3, 440(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a1, t3
+; RV64IMZBS-NEXT: ld t3, 392(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a2, t3
+; RV64IMZBS-NEXT: ld t3, 328(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a3, t3
+; RV64IMZBS-NEXT: ld t3, 280(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a4, t3
+; RV64IMZBS-NEXT: ld t3, 224(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a5, t3
+; RV64IMZBS-NEXT: ld t3, 152(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a6, t3
+; RV64IMZBS-NEXT: ld t3, 72(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, a7, t3
+; RV64IMZBS-NEXT: ld t3, 24(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, s9, t3
+; RV64IMZBS-NEXT: xor s4, s4, s7
+; RV64IMZBS-NEXT: xor t2, s3, t2
+; RV64IMZBS-NEXT: ld s3, 904(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t4, s3
+; RV64IMZBS-NEXT: ld s3, 856(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, t5, s3
+; RV64IMZBS-NEXT: ld s3, 800(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t6, t6, s3
+; RV64IMZBS-NEXT: ld s3, 752(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s0, s0, s3
+; RV64IMZBS-NEXT: ld s3, 672(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s1, s3
+; RV64IMZBS-NEXT: ld s3, 592(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s2, s2, s3
+; RV64IMZBS-NEXT: ld s3, 536(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s3
+; RV64IMZBS-NEXT: ld s3, 480(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s3
; RV64IMZBS-NEXT: xor a0, a0, a1
-; RV64IMZBS-NEXT: ld a1, 224(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, a1
-; RV64IMZBS-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, a1
-; RV64IMZBS-NEXT: ld a1, 376(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, a1
-; RV64IMZBS-NEXT: ld a1, 360(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t2, a1
-; RV64IMZBS-NEXT: ld s0, 336(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s1, s0
-; RV64IMZBS-NEXT: ld a1, 320(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s1, s3, a1
-; RV64IMZBS-NEXT: ld a1, 312(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s3, s4, a1
-; RV64IMZBS-NEXT: ld a1, 296(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s2, s2, a1
-; RV64IMZBS-NEXT: ld a1, 904(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a1, t3, a1
-; RV64IMZBS-NEXT: ld t3, 880(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t3, t4, t3
-; RV64IMZBS-NEXT: ld t4, 856(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a1, 424(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 368(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 320(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 256(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 184(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 96(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: ld a7, 40(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t3, a7
+; RV64IMZBS-NEXT: xor t3, s4, s8
+; RV64IMZBS-NEXT: xor t2, t2, t4
+; RV64IMZBS-NEXT: ld t4, 896(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t4, t5, t4
; RV64IMZBS-NEXT: ld t5, 832(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t5, t6, t5
-; RV64IMZBS-NEXT: ld t6, 824(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t6
; RV64IMZBS-NEXT: ld t6, 792(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t6
-; RV64IMZBS-NEXT: ld t6, 760(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t6
-; RV64IMZBS-NEXT: ld t6, 728(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, t6
-; RV64IMZBS-NEXT: ld t6, 696(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, t6
-; RV64IMZBS-NEXT: ld t6, 432(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a0, a0, t6
-; RV64IMZBS-NEXT: ld t6, 256(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a2, a2, t6
-; RV64IMZBS-NEXT: ld t6, 416(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, t6
-; RV64IMZBS-NEXT: ld t6, 408(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, a4, t6
-; RV64IMZBS-NEXT: ld t6, 400(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t2, t6
-; RV64IMZBS-NEXT: ld t6, 392(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t6, s0, t6
-; RV64IMZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 704(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor s0, s1, s0
-; RV64IMZBS-NEXT: ld s1, 344(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s1, s3, s1
-; RV64IMZBS-NEXT: ld s3, 328(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s2, s2, s3
-; RV64IMZBS-NEXT: ld s3, 928(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t4, s3
-; RV64IMZBS-NEXT: ld s3, 888(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, s3
-; RV64IMZBS-NEXT: ld s3, 848(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, s3
-; RV64IMZBS-NEXT: ld s3, 816(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, s3
-; RV64IMZBS-NEXT: ld s3, 784(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, s3
-; RV64IMZBS-NEXT: ld s3, 752(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, s3
-; RV64IMZBS-NEXT: ld s3, 440(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, s3
-; RV64IMZBS-NEXT: ld s3, 424(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t2, s3
-; RV64IMZBS-NEXT: ld s3, 1048(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t6, t6, s3
-; RV64IMZBS-NEXT: ld s3, 1032(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s0, s3
-; RV64IMZBS-NEXT: ld s3, 384(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s1, s1, s3
-; RV64IMZBS-NEXT: ld s3, 352(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s2, s2, s3
-; RV64IMZBS-NEXT: xor t3, a1, t3
-; RV64IMZBS-NEXT: xor t3, t3, t4
-; RV64IMZBS-NEXT: ld t4, 936(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t4
-; RV64IMZBS-NEXT: ld t4, 912(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t4
-; RV64IMZBS-NEXT: ld t4, 872(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t4
-; RV64IMZBS-NEXT: ld t4, 840(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t0, t0, t4
+; RV64IMZBS-NEXT: ld s1, 640(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor s1, s2, s1
+; RV64IMZBS-NEXT: ld s2, 560(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s2
+; RV64IMZBS-NEXT: ld s2, 504(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s2
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 352(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 288(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 216(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 144(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 64(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: xor a7, t3, s10
+; RV64IMZBS-NEXT: xor t2, t2, t4
+; RV64IMZBS-NEXT: ld t3, 872(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
; RV64IMZBS-NEXT: ld t4, 808(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t1, t1, t4
-; RV64IMZBS-NEXT: xor a2, a0, a2
-; RV64IMZBS-NEXT: xor a2, a2, a3
-; RV64IMZBS-NEXT: ld a3, 1016(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, t2, a3
-; RV64IMZBS-NEXT: ld t2, 1080(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t2, t6, t2
-; RV64IMZBS-NEXT: ld t4, 1064(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, s0, t4
-; RV64IMZBS-NEXT: ld t6, 1040(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 744(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t5, s0, t5
+; RV64IMZBS-NEXT: ld t6, 656(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t6, s1, t6
-; RV64IMZBS-NEXT: ld s0, 1024(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor s0, s2, s0
-; RV64IMZBS-NEXT: xor t3, t3, t5
-; RV64IMZBS-NEXT: ld t5, 952(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a5, a5, t5
-; RV64IMZBS-NEXT: ld t5, 944(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a6, a6, t5
-; RV64IMZBS-NEXT: ld t5, 920(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a7, a7, t5
-; RV64IMZBS-NEXT: ld t5, 896(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 576(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, s0
+; RV64IMZBS-NEXT: ld s0, 512(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, s0
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 384(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 312(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 240(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 160(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 80(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: ld a6, 16(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, a7, a6
+; RV64IMZBS-NEXT: xor a7, t2, t3
+; RV64IMZBS-NEXT: ld t2, 824(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t2, t4, t2
+; RV64IMZBS-NEXT: ld t3, 760(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t3, t5, t3
+; RV64IMZBS-NEXT: ld t4, 688(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t4, t6, t4
+; RV64IMZBS-NEXT: ld t5, 584(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t0, t0, t5
-; RV64IMZBS-NEXT: ld t5, 864(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld t5, 528(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t1, t1, t5
-; RV64IMZBS-NEXT: xor a2, a2, a4
-; RV64IMZBS-NEXT: ld a4, 1104(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: ld a4, 1096(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor a4, t2, a4
-; RV64IMZBS-NEXT: ld t2, 1088(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 336(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 264(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 88(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: ld a5, 32(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a6, a5
+; RV64IMZBS-NEXT: xor a6, a7, t2
+; RV64IMZBS-NEXT: ld a7, 784(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t3, a7
+; RV64IMZBS-NEXT: ld t2, 696(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: xor t2, t4, t2
-; RV64IMZBS-NEXT: ld t4, 1072(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t4, t6, t4
-; RV64IMZBS-NEXT: ld t5, 1056(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: xor t5, s0, t5
-; RV64IMZBS-NEXT: xor a5, t3, a5
-; RV64IMZBS-NEXT: xor a5, a5, a6
-; RV64IMZBS-NEXT: xor a3, a2, a3
-; RV64IMZBS-NEXT: xor a3, a3, a4
-; RV64IMZBS-NEXT: slli a1, a1, 56
-; RV64IMZBS-NEXT: slli a0, a0, 56
-; RV64IMZBS-NEXT: ld t6, 976(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, t3, t6
-; RV64IMZBS-NEXT: and a2, a2, t6
-; RV64IMZBS-NEXT: slli a4, a4, 40
-; RV64IMZBS-NEXT: slli a2, a2, 40
-; RV64IMZBS-NEXT: or a1, a1, a4
-; RV64IMZBS-NEXT: srli a4, a5, 8
-; RV64IMZBS-NEXT: or a0, a0, a2
-; RV64IMZBS-NEXT: srli a2, a3, 8
-; RV64IMZBS-NEXT: ld a6, 992(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: and a4, a4, a6
-; RV64IMZBS-NEXT: and a2, a2, a6
-; RV64IMZBS-NEXT: xor a6, a5, a7
+; RV64IMZBS-NEXT: ld t3, 616(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t0, t0, t3
+; RV64IMZBS-NEXT: ld t3, 912(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor t1, t1, t3
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 272(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 200(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: ld a3, 104(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a3, a4, a3
+; RV64IMZBS-NEXT: ld a4, 920(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a4, a5, a4
+; RV64IMZBS-NEXT: xor a5, a6, a7
+; RV64IMZBS-NEXT: ld a6, 720(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a6, t2, a6
+; RV64IMZBS-NEXT: ld a7, 624(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a7, t0, a7
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 208(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: ld a2, 128(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a2, a3, a2
+; RV64IMZBS-NEXT: xor a3, a5, a6
+; RV64IMZBS-NEXT: ld a5, 632(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a5, a7, a5
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: xor a1, a2, a1
+; RV64IMZBS-NEXT: xor a3, a3, a5
+; RV64IMZBS-NEXT: xor a0, a0, a1
+; RV64IMZBS-NEXT: xor a1, a3, t1
+; RV64IMZBS-NEXT: xor a0, a0, a4
+; RV64IMZBS-NEXT: srli a2, a1, 40
+; RV64IMZBS-NEXT: srli a3, a1, 56
+; RV64IMZBS-NEXT: srli a4, a1, 24
+; RV64IMZBS-NEXT: srli a5, a1, 8
+; RV64IMZBS-NEXT: srliw a6, a1, 24
; RV64IMZBS-NEXT: lui t3, 4080
+; RV64IMZBS-NEXT: and a7, a1, t3
+; RV64IMZBS-NEXT: slli t0, a1, 56
+; RV64IMZBS-NEXT: ld t4, 936(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: and a1, a1, t4
+; RV64IMZBS-NEXT: srli t1, a0, 8
+; RV64IMZBS-NEXT: ld t2, 960(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: and a5, a5, t2
+; RV64IMZBS-NEXT: and t1, t1, t2
+; RV64IMZBS-NEXT: srli t2, a0, 40
+; RV64IMZBS-NEXT: and a2, a2, t4
+; RV64IMZBS-NEXT: or a2, a2, a3
+; RV64IMZBS-NEXT: srli a3, a0, 56
+; RV64IMZBS-NEXT: and a4, a4, t3
+; RV64IMZBS-NEXT: or a4, a5, a4
+; RV64IMZBS-NEXT: srli a5, a0, 24
+; RV64IMZBS-NEXT: slli a6, a6, 32
+; RV64IMZBS-NEXT: slli a7, a7, 24
+; RV64IMZBS-NEXT: or a6, a7, a6
+; RV64IMZBS-NEXT: srliw a7, a0, 24
; RV64IMZBS-NEXT: and a5, a5, t3
-; RV64IMZBS-NEXT: xor a7, a3, t2
-; RV64IMZBS-NEXT: and a3, a3, t3
-; RV64IMZBS-NEXT: xor t0, a6, t0
-; RV64IMZBS-NEXT: srli a6, a6, 24
-; RV64IMZBS-NEXT: xor t2, a7, t4
-; RV64IMZBS-NEXT: srli a7, a7, 24
-; RV64IMZBS-NEXT: and a6, a6, t3
-; RV64IMZBS-NEXT: and a7, a7, t3
-; RV64IMZBS-NEXT: or a4, a4, a6
-; RV64IMZBS-NEXT: srli a6, t0, 40
-; RV64IMZBS-NEXT: or a2, a2, a7
-; RV64IMZBS-NEXT: srli a7, t2, 40
-; RV64IMZBS-NEXT: and a6, a6, t6
-; RV64IMZBS-NEXT: and a7, a7, t6
-; RV64IMZBS-NEXT: slli a5, a5, 24
-; RV64IMZBS-NEXT: xor t1, t0, t1
-; RV64IMZBS-NEXT: srliw t0, t0, 24
-; RV64IMZBS-NEXT: slli t0, t0, 32
-; RV64IMZBS-NEXT: or a5, a5, t0
-; RV64IMZBS-NEXT: srli t0, t1, 56
-; RV64IMZBS-NEXT: or a6, a6, t0
-; RV64IMZBS-NEXT: slli a3, a3, 24
-; RV64IMZBS-NEXT: xor t0, t2, t5
-; RV64IMZBS-NEXT: srliw t1, t2, 24
-; RV64IMZBS-NEXT: slli t1, t1, 32
-; RV64IMZBS-NEXT: or a3, a3, t1
-; RV64IMZBS-NEXT: srli t0, t0, 56
-; RV64IMZBS-NEXT: or a7, a7, t0
-; RV64IMZBS-NEXT: or a1, a1, a5
-; RV64IMZBS-NEXT: or a4, a4, a6
+; RV64IMZBS-NEXT: and t3, a0, t3
+; RV64IMZBS-NEXT: and t2, t2, t4
+; RV64IMZBS-NEXT: and t4, a0, t4
+; RV64IMZBS-NEXT: slli a0, a0, 56
+; RV64IMZBS-NEXT: slli a1, a1, 40
+; RV64IMZBS-NEXT: slli a7, a7, 32
+; RV64IMZBS-NEXT: slli t3, t3, 24
+; RV64IMZBS-NEXT: slli t4, t4, 40
+; RV64IMZBS-NEXT: or a1, t0, a1
+; RV64IMZBS-NEXT: or a3, t2, a3
+; RV64IMZBS-NEXT: or a5, t1, a5
+; RV64IMZBS-NEXT: or a7, t3, a7
+; RV64IMZBS-NEXT: or a0, a0, t4
+; RV64IMZBS-NEXT: or a2, a4, a2
+; RV64IMZBS-NEXT: or a1, a1, a6
+; RV64IMZBS-NEXT: or a3, a5, a3
+; RV64IMZBS-NEXT: or a0, a0, a7
+; RV64IMZBS-NEXT: or a1, a1, a2
; RV64IMZBS-NEXT: or a0, a0, a3
-; RV64IMZBS-NEXT: or a2, a2, a7
-; RV64IMZBS-NEXT: or a1, a1, a4
-; RV64IMZBS-NEXT: or a0, a0, a2
; RV64IMZBS-NEXT: srli a2, a1, 4
-; RV64IMZBS-NEXT: ld a4, 984(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 952(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: and a1, a1, a4
; RV64IMZBS-NEXT: srli a3, a0, 4
; RV64IMZBS-NEXT: and a0, a0, a4
; RV64IMZBS-NEXT: and a2, a2, a4
-; RV64IMZBS-NEXT: and a3, a3, a4
; RV64IMZBS-NEXT: slli a1, a1, 4
+; RV64IMZBS-NEXT: and a3, a3, a4
; RV64IMZBS-NEXT: slli a0, a0, 4
; RV64IMZBS-NEXT: or a1, a2, a1
; RV64IMZBS-NEXT: or a0, a3, a0
; RV64IMZBS-NEXT: srli a2, a1, 2
-; RV64IMZBS-NEXT: ld a4, 968(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 944(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: and a1, a1, a4
; RV64IMZBS-NEXT: srli a3, a0, 2
; RV64IMZBS-NEXT: and a0, a0, a4
; RV64IMZBS-NEXT: and a2, a2, a4
-; RV64IMZBS-NEXT: and a3, a3, a4
; RV64IMZBS-NEXT: slli a1, a1, 2
+; RV64IMZBS-NEXT: and a3, a3, a4
; RV64IMZBS-NEXT: slli a0, a0, 2
; RV64IMZBS-NEXT: or a1, a2, a1
; RV64IMZBS-NEXT: or a0, a3, a0
; RV64IMZBS-NEXT: srli a2, a1, 1
-; RV64IMZBS-NEXT: ld a4, 960(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a4, 928(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: and a1, a1, a4
; RV64IMZBS-NEXT: srli a3, a0, 1
; RV64IMZBS-NEXT: and a0, a0, a4
; RV64IMZBS-NEXT: and a2, a2, a4
-; RV64IMZBS-NEXT: and a3, a3, a4
; RV64IMZBS-NEXT: slli a1, a1, 1
-; RV64IMZBS-NEXT: or a1, a2, a1
+; RV64IMZBS-NEXT: and a3, a3, a4
; RV64IMZBS-NEXT: slli a0, a0, 1
+; RV64IMZBS-NEXT: or a1, a2, a1
; RV64IMZBS-NEXT: or a0, a3, a0
-; RV64IMZBS-NEXT: ld a2, 1000(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 968(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: sd a1, 0(a2)
; RV64IMZBS-NEXT: sd a0, 8(a2)
-; RV64IMZBS-NEXT: ld a2, 1008(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld a2, 976(sp) # 8-byte Folded Reload
; RV64IMZBS-NEXT: sd a1, 0(a2)
; RV64IMZBS-NEXT: sd a0, 8(a2)
-; RV64IMZBS-NEXT: ld ra, 1208(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s0, 1200(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s1, 1192(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s2, 1184(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s3, 1176(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s4, 1168(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s5, 1160(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s6, 1152(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s7, 1144(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s8, 1136(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s9, 1128(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s10, 1120(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: ld s11, 1112(sp) # 8-byte Folded Reload
-; RV64IMZBS-NEXT: addi sp, sp, 1216
+; RV64IMZBS-NEXT: ld ra, 1080(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s0, 1072(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s1, 1064(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s2, 1056(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s3, 1048(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s4, 1040(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s5, 1032(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s6, 1024(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s7, 1016(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s8, 1008(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s9, 1000(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s10, 992(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: ld s11, 984(sp) # 8-byte Folded Reload
+; RV64IMZBS-NEXT: addi sp, sp, 1088
; RV64IMZBS-NEXT: ret
%x.ext = zext <2 x i64> %x to <2 x i128>
%y.ext = zext <2 x i64> %y to <2 x i128>
@@ -14054,5 +13215,6 @@ define void @commutative_clmulr_v2i64(<2 x i64> %x, <2 x i64> %y, ptr %p0, ptr %
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
; CHECK-I: {{.*}}
+; CHECK-M: {{.*}}
+; CHECK-ZBS: {{.*}}
diff --git a/llvm/test/CodeGen/X86/clmul.ll b/llvm/test/CodeGen/X86/clmul.ll
index 42dcb538dd17e..d8f522ae06e62 100644
--- a/llvm/test/CodeGen/X86/clmul.ll
+++ b/llvm/test/CodeGen/X86/clmul.ll
@@ -9,51 +9,50 @@
define i8 @clmul_i8(i8 %a, i8 %b) nounwind {
; SCALAR-LABEL: clmul_i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andb $1, %dl
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andb $2, %cl
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %cl
-; SCALAR-NEXT: movl %eax, %ecx
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %dl
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: xorb %cl, %dl
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andb $4, %cl
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %cl
-; SCALAR-NEXT: movl %eax, %r8d
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andb $8, %cl
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %cl
-; SCALAR-NEXT: movl %eax, %ecx
-; SCALAR-NEXT: xorb %r8b, %cl
-; SCALAR-NEXT: xorb %dl, %cl
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andb $16, %dl
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %dl
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andb $32, %r8b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r8b
-; SCALAR-NEXT: movl %eax, %r8d
-; SCALAR-NEXT: movl %esi, %r9d
-; SCALAR-NEXT: andb $64, %r9b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r9b
-; SCALAR-NEXT: movl %eax, %r9d
-; SCALAR-NEXT: xorb %dl, %r8b
-; SCALAR-NEXT: xorb %r8b, %r9b
-; SCALAR-NEXT: xorb %cl, %r9b
-; SCALAR-NEXT: andb $-128, %sil
+; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
+; SCALAR-NEXT: xorl %ecx, %ecx
+; SCALAR-NEXT: testb $1, %sil
; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %sil
-; SCALAR-NEXT: xorb %r9b, %al
+; SCALAR-NEXT: cmovel %ecx, %eax
+; SCALAR-NEXT: leal (%rdi,%rdi), %edx
+; SCALAR-NEXT: movzbl %dl, %edx
+; SCALAR-NEXT: testb $2, %sil
+; SCALAR-NEXT: cmovel %ecx, %edx
+; SCALAR-NEXT: xorl %eax, %edx
+; SCALAR-NEXT: leal (,%rdi,4), %eax
+; SCALAR-NEXT: movzbl %al, %r8d
+; SCALAR-NEXT: testb $4, %sil
+; SCALAR-NEXT: cmovel %ecx, %r8d
+; SCALAR-NEXT: leal (,%rdi,8), %eax
+; SCALAR-NEXT: movzbl %al, %eax
+; SCALAR-NEXT: testb $8, %sil
+; SCALAR-NEXT: cmovel %ecx, %eax
+; SCALAR-NEXT: xorl %r8d, %eax
+; SCALAR-NEXT: xorl %edx, %eax
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: shlb $4, %dl
+; SCALAR-NEXT: movzbl %dl, %edx
+; SCALAR-NEXT: testb $16, %sil
+; SCALAR-NEXT: cmovel %ecx, %edx
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: shlb $5, %r8b
+; SCALAR-NEXT: movzbl %r8b, %r8d
+; SCALAR-NEXT: testb $32, %sil
+; SCALAR-NEXT: cmovel %ecx, %r8d
+; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: shlb $6, %dl
+; SCALAR-NEXT: movzbl %dl, %edx
+; SCALAR-NEXT: testb $64, %sil
+; SCALAR-NEXT: cmovel %ecx, %edx
+; SCALAR-NEXT: xorl %r8d, %edx
+; SCALAR-NEXT: xorl %eax, %edx
+; SCALAR-NEXT: shlb $7, %dil
+; SCALAR-NEXT: movzbl %dil, %eax
+; SCALAR-NEXT: testb $-128, %sil
+; SCALAR-NEXT: cmovel %ecx, %eax
+; SCALAR-NEXT: xorl %edx, %eax
+; SCALAR-NEXT: # kill: def $al killed $al killed $eax
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmul_i8:
@@ -80,66 +79,93 @@ define i8 @clmul_i8(i8 %a, i8 %b) nounwind {
define i16 @clmul_i16(i16 %a, i16 %b) nounwind {
; SCALAR-LABEL: clmul_i16:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $2, %eax
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
+; SCALAR-NEXT: leal (%rdi,%rdi), %eax
; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $1, %ecx
-; SCALAR-NEXT: imull %edi, %ecx
-; SCALAR-NEXT: xorl %eax, %ecx
+; SCALAR-NEXT: andl $2, %ecx
+; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $4, %eax
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: andl $1, %eax
+; SCALAR-NEXT: cmovnel %edi, %eax
+; SCALAR-NEXT: xorl %ecx, %eax
+; SCALAR-NEXT: leal (,%rdi,4), %ecx
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $8, %edx
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %eax, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $16, %eax
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: andl $4, %edx
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: leal (,%rdi,8), %r8d
; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $32, %ecx
-; SCALAR-NEXT: imull %edi, %ecx
+; SCALAR-NEXT: andl $8, %ecx
+; SCALAR-NEXT: cmovnel %r8d, %ecx
+; SCALAR-NEXT: xorl %edx, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $4, %eax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $16, %edx
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $5, %eax
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $32, %r8d
+; SCALAR-NEXT: cmovnel %eax, %r8d
+; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: shll $6, %edx
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $64, %eax
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: cmovnel %edx, %eax
+; SCALAR-NEXT: xorl %r8d, %eax
; SCALAR-NEXT: xorl %ecx, %eax
-; SCALAR-NEXT: xorl %edx, %eax
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $128, %ecx
-; SCALAR-NEXT: imull %edi, %ecx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $7, %ecx
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $256, %edx # imm = 0x100
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
+; SCALAR-NEXT: andl $128, %edx
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $8, %ecx
; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andl $512, %r8d # imm = 0x200
-; SCALAR-NEXT: imull %edi, %r8d
+; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
+; SCALAR-NEXT: cmovnel %ecx, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $9, %ecx
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $512, %edx # imm = 0x200
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: xorl %r8d, %edx
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: shll $10, %r8d
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $1024, %ecx # imm = 0x400
-; SCALAR-NEXT: imull %edi, %ecx
-; SCALAR-NEXT: xorl %r8d, %ecx
+; SCALAR-NEXT: cmovnel %r8d, %ecx
+; SCALAR-NEXT: xorl %edx, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $2048, %eax # imm = 0x800
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $11, %eax
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $4096, %edx # imm = 0x1000
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %eax, %edx
+; SCALAR-NEXT: andl $2048, %edx # imm = 0x800
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $12, %eax
; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andl $8192, %r8d # imm = 0x2000
-; SCALAR-NEXT: imull %edi, %r8d
+; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
+; SCALAR-NEXT: cmovnel %eax, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $13, %eax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: xorl %r8d, %edx
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: shll $14, %r8d
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $16384, %eax # imm = 0x4000
-; SCALAR-NEXT: imull %edi, %eax
-; SCALAR-NEXT: xorl %r8d, %eax
-; SCALAR-NEXT: andl $-32768, %esi # imm = 0x8000
-; SCALAR-NEXT: imull %edi, %esi
+; SCALAR-NEXT: cmovnel %r8d, %eax
+; SCALAR-NEXT: xorl %edx, %eax
+; SCALAR-NEXT: shll $15, %edi
+; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000
+; SCALAR-NEXT: cmovnel %edi, %esi
; SCALAR-NEXT: xorl %esi, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: # kill: def $ax killed $ax killed $eax
@@ -169,130 +195,189 @@ define i16 @clmul_i16(i16 %a, i16 %b) nounwind {
define i32 @clmul_i32(i32 %a, i32 %b) nounwind {
; SCALAR-LABEL: clmul_i32:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $2, %eax
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
+; SCALAR-NEXT: leal (%rdi,%rdi), %eax
; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $1, %ecx
-; SCALAR-NEXT: imull %edi, %ecx
-; SCALAR-NEXT: xorl %eax, %ecx
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $4, %eax
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: andl $2, %ecx
+; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $8, %edx
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %eax, %edx
+; SCALAR-NEXT: andl $1, %edx
+; SCALAR-NEXT: cmovnel %edi, %edx
; SCALAR-NEXT: xorl %ecx, %edx
+; SCALAR-NEXT: leal (,%rdi,4), %eax
+; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: andl $4, %ecx
+; SCALAR-NEXT: cmovnel %eax, %ecx
+; SCALAR-NEXT: leal (,%rdi,8), %r8d
; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $16, %eax
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: andl $8, %eax
+; SCALAR-NEXT: cmovnel %r8d, %eax
+; SCALAR-NEXT: xorl %ecx, %eax
+; SCALAR-NEXT: xorl %edx, %eax
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $4, %ecx
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $16, %edx
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $5, %ecx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $32, %r8d
-; SCALAR-NEXT: imull %edi, %r8d
-; SCALAR-NEXT: xorl %eax, %r8d
+; SCALAR-NEXT: cmovnel %ecx, %r8d
+; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: shll $6, %edx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $64, %ecx
-; SCALAR-NEXT: imull %edi, %ecx
+; SCALAR-NEXT: cmovnel %edx, %ecx
; SCALAR-NEXT: xorl %r8d, %ecx
-; SCALAR-NEXT: xorl %edx, %ecx
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $128, %eax
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: xorl %eax, %ecx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $7, %eax
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $256, %edx # imm = 0x100
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %eax, %edx
+; SCALAR-NEXT: andl $128, %edx
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $8, %eax
; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andl $512, %r8d # imm = 0x200
-; SCALAR-NEXT: imull %edi, %r8d
+; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
+; SCALAR-NEXT: cmovnel %eax, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $9, %eax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $512, %edx # imm = 0x200
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: xorl %r8d, %edx
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: shll $10, %r8d
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $1024, %eax # imm = 0x400
-; SCALAR-NEXT: imull %edi, %eax
-; SCALAR-NEXT: xorl %r8d, %eax
+; SCALAR-NEXT: cmovnel %r8d, %eax
+; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: xorl %ecx, %eax
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $2048, %ecx # imm = 0x800
-; SCALAR-NEXT: imull %edi, %ecx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $11, %ecx
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $4096, %edx # imm = 0x1000
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $8192, %ecx # imm = 0x2000
-; SCALAR-NEXT: imull %edi, %ecx
-; SCALAR-NEXT: xorl %edx, %ecx
+; SCALAR-NEXT: andl $2048, %edx # imm = 0x800
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $12, %ecx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
+; SCALAR-NEXT: cmovnel %ecx, %r8d
+; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $13, %ecx
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $16384, %edx # imm = 0x4000
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
+; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: xorl %r8d, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $14, %ecx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000
+; SCALAR-NEXT: cmovnel %ecx, %r8d
+; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: shll $15, %edx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $32768, %ecx # imm = 0x8000
-; SCALAR-NEXT: imull %edi, %ecx
-; SCALAR-NEXT: xorl %edx, %ecx
+; SCALAR-NEXT: cmovnel %edx, %ecx
+; SCALAR-NEXT: xorl %r8d, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $65536, %eax # imm = 0x10000
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $16, %eax
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $131072, %edx # imm = 0x20000
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %eax, %edx
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $262144, %eax # imm = 0x40000
-; SCALAR-NEXT: imull %edi, %eax
-; SCALAR-NEXT: xorl %edx, %eax
+; SCALAR-NEXT: andl $65536, %edx # imm = 0x10000
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $17, %eax
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000
+; SCALAR-NEXT: cmovnel %eax, %r8d
+; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $18, %eax
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $524288, %edx # imm = 0x80000
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %eax, %edx
+; SCALAR-NEXT: andl $262144, %edx # imm = 0x40000
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: xorl %r8d, %edx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $19, %eax
; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andl $1048576, %r8d # imm = 0x100000
-; SCALAR-NEXT: imull %edi, %r8d
+; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000
+; SCALAR-NEXT: cmovnel %eax, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $20, %eax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $1048576, %edx # imm = 0x100000
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: xorl %r8d, %edx
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: shll $21, %r8d
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $2097152, %eax # imm = 0x200000
-; SCALAR-NEXT: imull %edi, %eax
-; SCALAR-NEXT: xorl %r8d, %eax
+; SCALAR-NEXT: cmovnel %r8d, %eax
+; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: xorl %ecx, %eax
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $4194304, %ecx # imm = 0x400000
-; SCALAR-NEXT: imull %edi, %ecx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $22, %ecx
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $8388608, %edx # imm = 0x800000
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $16777216, %ecx # imm = 0x1000000
-; SCALAR-NEXT: imull %edi, %ecx
-; SCALAR-NEXT: xorl %edx, %ecx
+; SCALAR-NEXT: andl $4194304, %edx # imm = 0x400000
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $23, %ecx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
+; SCALAR-NEXT: cmovnel %ecx, %r8d
+; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $24, %ecx
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $33554432, %edx # imm = 0x2000000
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $67108864, %ecx # imm = 0x4000000
-; SCALAR-NEXT: imull %edi, %ecx
-; SCALAR-NEXT: xorl %edx, %ecx
+; SCALAR-NEXT: andl $16777216, %edx # imm = 0x1000000
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: xorl %r8d, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $25, %ecx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000
+; SCALAR-NEXT: cmovnel %ecx, %r8d
+; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $26, %ecx
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $134217728, %edx # imm = 0x8000000
-; SCALAR-NEXT: imull %edi, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
+; SCALAR-NEXT: andl $67108864, %edx # imm = 0x4000000
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: xorl %r8d, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $27, %ecx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000
+; SCALAR-NEXT: cmovnel %ecx, %r8d
+; SCALAR-NEXT: xorl %edx, %r8d
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: shll $28, %edx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $268435456, %ecx # imm = 0x10000000
-; SCALAR-NEXT: imull %edi, %ecx
-; SCALAR-NEXT: xorl %edx, %ecx
+; SCALAR-NEXT: cmovnel %edx, %ecx
+; SCALAR-NEXT: xorl %r8d, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $29, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $536870912, %edx # imm = 0x20000000
-; SCALAR-NEXT: imull %edi, %edx
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: shll $30, %r8d
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $1073741824, %eax # imm = 0x40000000
-; SCALAR-NEXT: imull %edi, %eax
+; SCALAR-NEXT: cmovnel %r8d, %eax
; SCALAR-NEXT: xorl %edx, %eax
+; SCALAR-NEXT: shll $31, %edi
; SCALAR-NEXT: andl $-2147483648, %esi # imm = 0x80000000
-; SCALAR-NEXT: imull %edi, %esi
+; SCALAR-NEXT: cmovnel %edi, %esi
; SCALAR-NEXT: xorl %esi, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: retq
@@ -321,324 +406,355 @@ define i32 @clmul_i32(i32 %a, i32 %b) nounwind {
define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
; SCALAR-LABEL: clmul_i64:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: pushq %rbp
-; SCALAR-NEXT: pushq %r15
-; SCALAR-NEXT: pushq %r14
-; SCALAR-NEXT: pushq %r13
-; SCALAR-NEXT: pushq %r12
-; SCALAR-NEXT: pushq %rbx
-; SCALAR-NEXT: subq $40, %rsp
-; SCALAR-NEXT: movl %esi, %r10d
-; SCALAR-NEXT: movl %esi, %r15d
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: movl %esi, %r13d
-; SCALAR-NEXT: movl %esi, %r12d
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: movl %esi, %ebp
-; SCALAR-NEXT: movl %esi, %r9d
+; SCALAR-NEXT: leaq (%rdi,%rdi), %rax
; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: andl $2, %ecx
+; SCALAR-NEXT: cmovneq %rax, %rcx
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: movl %esi, %ebx
-; SCALAR-NEXT: movl %esi, %r14d
-; SCALAR-NEXT: andl $2, %r14d
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: andl $1, %r10d
-; SCALAR-NEXT: imulq %rdi, %r10
-; SCALAR-NEXT: xorq %r14, %r10
-; SCALAR-NEXT: movl %esi, %r14d
-; SCALAR-NEXT: andl $4, %r15d
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: andl $8, %eax
-; SCALAR-NEXT: imulq %rdi, %rax
-; SCALAR-NEXT: xorq %r15, %rax
-; SCALAR-NEXT: movl %esi, %r15d
-; SCALAR-NEXT: xorq %r10, %rax
-; SCALAR-NEXT: movl %esi, %r10d
-; SCALAR-NEXT: andl $16, %r13d
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: andl $32, %r12d
-; SCALAR-NEXT: imulq %rdi, %r12
-; SCALAR-NEXT: xorq %r13, %r12
-; SCALAR-NEXT: movl %esi, %r13d
-; SCALAR-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $64, %r8d
-; SCALAR-NEXT: imulq %rdi, %r8
-; SCALAR-NEXT: xorq %r12, %r8
-; SCALAR-NEXT: xorq %rax, %r8
-; SCALAR-NEXT: movl %esi, %r12d
-; SCALAR-NEXT: andl $128, %ebp
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: andl $256, %r9d # imm = 0x100
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: xorq %rbp, %r9
-; SCALAR-NEXT: movl %esi, %ebp
-; SCALAR-NEXT: andl $512, %ecx # imm = 0x200
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %r9, %rcx
-; SCALAR-NEXT: movl %esi, %r9d
-; SCALAR-NEXT: andl $1024, %edx # imm = 0x400
-; SCALAR-NEXT: imulq %rdi, %rdx
+; SCALAR-NEXT: andl $1, %edx
+; SCALAR-NEXT: cmovneq %rdi, %rdx
; SCALAR-NEXT: xorq %rcx, %rdx
+; SCALAR-NEXT: leaq (,%rdi,4), %rax
+; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: andl $4, %ecx
+; SCALAR-NEXT: cmovneq %rax, %rcx
+; SCALAR-NEXT: leaq (,%rdi,8), %r8
; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: andl $8, %eax
+; SCALAR-NEXT: cmovneq %r8, %rax
+; SCALAR-NEXT: xorq %rcx, %rax
+; SCALAR-NEXT: xorq %rdx, %rax
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $4, %rcx
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $16, %edx
+; SCALAR-NEXT: cmovneq %rcx, %rdx
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $5, %rcx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $32, %r8d
+; SCALAR-NEXT: cmovneq %rcx, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $6, %rdx
; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $2048, %r11d # imm = 0x800
-; SCALAR-NEXT: imulq %rdi, %r11
-; SCALAR-NEXT: andl $4096, %ebx # imm = 0x1000
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: xorq %r11, %rbx
-; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: andl $8192, %r14d # imm = 0x2000
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: xorq %rbx, %r14
-; SCALAR-NEXT: movl %esi, %ebx
-; SCALAR-NEXT: andl $16384, %r15d # imm = 0x4000
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: xorq %r14, %r15
+; SCALAR-NEXT: andl $64, %ecx
+; SCALAR-NEXT: cmovneq %rdx, %rcx
+; SCALAR-NEXT: xorq %r8, %rcx
+; SCALAR-NEXT: xorq %rax, %rcx
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $7, %rax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $128, %edx
+; SCALAR-NEXT: cmovneq %rax, %rdx
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $8, %rax
; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000
-; SCALAR-NEXT: imulq %rdi, %r10
-; SCALAR-NEXT: xorq %r15, %r10
-; SCALAR-NEXT: movl %esi, %r14d
-; SCALAR-NEXT: xorq %rdx, %r10
+; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
+; SCALAR-NEXT: cmovneq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $9, %rax
; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; SCALAR-NEXT: andl $65536, %ecx # imm = 0x10000
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: andl $131072, %r13d # imm = 0x20000
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rcx, %r13
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $262144, %r12d # imm = 0x40000
-; SCALAR-NEXT: imulq %rdi, %r12
-; SCALAR-NEXT: xorq %r13, %r12
-; SCALAR-NEXT: movl %esi, %r15d
-; SCALAR-NEXT: andl $524288, %ebp # imm = 0x80000
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r12, %rbp
-; SCALAR-NEXT: movl %esi, %r12d
-; SCALAR-NEXT: andl $1048576, %r9d # imm = 0x100000
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: xorq %rbp, %r9
-; SCALAR-NEXT: movabsq $4294967296, %rbp # imm = 0x100000000
-; SCALAR-NEXT: andq %rsi, %rbp
-; SCALAR-NEXT: andl $2097152, %eax # imm = 0x200000
-; SCALAR-NEXT: imulq %rdi, %rax
-; SCALAR-NEXT: xorq %r9, %rax
-; SCALAR-NEXT: movabsq $8589934592, %r9 # imm = 0x200000000
-; SCALAR-NEXT: andq %rsi, %r9
-; SCALAR-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: xorq %r10, %rax
-; SCALAR-NEXT: movabsq $17179869184, %r9 # imm = 0x400000000
-; SCALAR-NEXT: andq %rsi, %r9
-; SCALAR-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; SCALAR-NEXT: andl $4194304, %r9d # imm = 0x400000
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: andl $8388608, %r11d # imm = 0x800000
-; SCALAR-NEXT: imulq %rdi, %r11
-; SCALAR-NEXT: xorq %r9, %r11
-; SCALAR-NEXT: movabsq $34359738368, %r9 # imm = 0x800000000
-; SCALAR-NEXT: andq %rsi, %r9
-; SCALAR-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $16777216, %ebx # imm = 0x1000000
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: xorq %r11, %rbx
-; SCALAR-NEXT: movabsq $68719476736, %r9 # imm = 0x1000000000
-; SCALAR-NEXT: andq %rsi, %r9
-; SCALAR-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000
-; SCALAR-NEXT: imulq %rdi, %r8
-; SCALAR-NEXT: xorq %rbx, %r8
-; SCALAR-NEXT: movabsq $137438953472, %r9 # imm = 0x2000000000
-; SCALAR-NEXT: andq %rsi, %r9
-; SCALAR-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $67108864, %r14d # imm = 0x4000000
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: xorq %r8, %r14
-; SCALAR-NEXT: movabsq $274877906944, %r8 # imm = 0x4000000000
-; SCALAR-NEXT: andq %rsi, %r8
-; SCALAR-NEXT: movq %r8, (%rsp) # 8-byte Spill
-; SCALAR-NEXT: andl $134217728, %edx # imm = 0x8000000
-; SCALAR-NEXT: imulq %rdi, %rdx
-; SCALAR-NEXT: xorq %r14, %rdx
-; SCALAR-NEXT: movabsq $549755813888, %r8 # imm = 0x8000000000
-; SCALAR-NEXT: andq %rsi, %r8
-; SCALAR-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movq %rcx, %r13
-; SCALAR-NEXT: andl $268435456, %r13d # imm = 0x10000000
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rdx, %r13
-; SCALAR-NEXT: movabsq $1099511627776, %rcx # imm = 0x10000000000
-; SCALAR-NEXT: andq %rsi, %rcx
-; SCALAR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: xorq %rax, %r13
-; SCALAR-NEXT: movabsq $2199023255552, %rax # imm = 0x20000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $536870912, %r15d # imm = 0x20000000
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: andl $1073741824, %r12d # imm = 0x40000000
-; SCALAR-NEXT: imulq %rdi, %r12
-; SCALAR-NEXT: xorq %r15, %r12
-; SCALAR-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $4398046511104, %rax # imm = 0x40000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $8796093022208, %rax # imm = 0x80000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $17592186044416, %rax # imm = 0x100000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $35184372088832, %rax # imm = 0x200000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $70368744177664, %rax # imm = 0x400000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $140737488355328, %rax # imm = 0x800000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $281474976710656, %rax # imm = 0x1000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $562949953421312, %rax # imm = 0x2000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $1125899906842624, %rax # imm = 0x4000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $2251799813685248, %rax # imm = 0x8000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $4503599627370496, %r12 # imm = 0x10000000000000
-; SCALAR-NEXT: andq %rsi, %r12
-; SCALAR-NEXT: movabsq $9007199254740992, %r14 # imm = 0x20000000000000
-; SCALAR-NEXT: andq %rsi, %r14
-; SCALAR-NEXT: movabsq $18014398509481984, %rax # imm = 0x40000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $36028797018963968, %r15 # imm = 0x80000000000000
-; SCALAR-NEXT: andq %rsi, %r15
-; SCALAR-NEXT: movabsq $72057594037927936, %rbx # imm = 0x100000000000000
-; SCALAR-NEXT: andq %rsi, %rbx
-; SCALAR-NEXT: movabsq $144115188075855872, %r11 # imm = 0x200000000000000
-; SCALAR-NEXT: andq %rsi, %r11
-; SCALAR-NEXT: movabsq $288230376151711744, %r10 # imm = 0x400000000000000
-; SCALAR-NEXT: andq %rsi, %r10
-; SCALAR-NEXT: movabsq $576460752303423488, %r9 # imm = 0x800000000000000
-; SCALAR-NEXT: andq %rsi, %r9
-; SCALAR-NEXT: movabsq $1152921504606846976, %r8 # imm = 0x1000000000000000
-; SCALAR-NEXT: andq %rsi, %r8
-; SCALAR-NEXT: movabsq $2305843009213693952, %rdx # imm = 0x2000000000000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movabsq $4611686018427387904, %rcx # imm = 0x4000000000000000
-; SCALAR-NEXT: andq %rsi, %rcx
-; SCALAR-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: # kill: def $esi killed $esi killed $rsi def $rsi
-; SCALAR-NEXT: andl $-2147483648, %esi # imm = 0x80000000
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: xorq %r13, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: movq (%rsp), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: xorq %rsi, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: imulq %rdi, %r12
-; SCALAR-NEXT: xorq %rsi, %r12
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: xorq %r12, %r14
-; SCALAR-NEXT: xorq %r13, %r14
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: xorq %rsi, %r15
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: xorq %r15, %rbx
-; SCALAR-NEXT: imulq %rdi, %r11
-; SCALAR-NEXT: xorq %rbx, %r11
-; SCALAR-NEXT: imulq %rdi, %r10
-; SCALAR-NEXT: xorq %r11, %r10
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: xorq %r10, %r9
-; SCALAR-NEXT: imulq %rdi, %r8
-; SCALAR-NEXT: xorq %r9, %r8
-; SCALAR-NEXT: imulq %rdi, %rdx
+; SCALAR-NEXT: andl $512, %edx # imm = 0x200
+; SCALAR-NEXT: cmovneq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %rdx, %rcx
-; SCALAR-NEXT: imulq %rdi, %rax
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $10, %r8
+; SCALAR-NEXT: movl %esi, %eax
+; SCALAR-NEXT: andl $1024, %eax # imm = 0x400
+; SCALAR-NEXT: cmovneq %r8, %rax
+; SCALAR-NEXT: xorq %rdx, %rax
; SCALAR-NEXT: xorq %rcx, %rax
-; SCALAR-NEXT: xorq %r14, %rax
-; SCALAR-NEXT: addq $40, %rsp
-; SCALAR-NEXT: popq %rbx
-; SCALAR-NEXT: popq %r12
-; SCALAR-NEXT: popq %r13
-; SCALAR-NEXT: popq %r14
-; SCALAR-NEXT: popq %r15
-; SCALAR-NEXT: popq %rbp
-; SCALAR-NEXT: retq
-;
-; SSE-PCLMUL-LABEL: clmul_i64:
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $11, %rcx
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $2048, %edx # imm = 0x800
+; SCALAR-NEXT: cmovneq %rcx, %rdx
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $12, %rcx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
+; SCALAR-NEXT: cmovneq %rcx, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $13, %rcx
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000
+; SCALAR-NEXT: cmovneq %rcx, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $14, %rcx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000
+; SCALAR-NEXT: cmovneq %rcx, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $15, %rdx
+; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: andl $32768, %ecx # imm = 0x8000
+; SCALAR-NEXT: cmovneq %rdx, %rcx
+; SCALAR-NEXT: xorq %r8, %rcx
+; SCALAR-NEXT: xorq %rax, %rcx
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $16, %rax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $65536, %edx # imm = 0x10000
+; SCALAR-NEXT: cmovneq %rax, %rdx
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $17, %rax
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000
+; SCALAR-NEXT: cmovneq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $18, %rax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $262144, %edx # imm = 0x40000
+; SCALAR-NEXT: cmovneq %rax, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $19, %rax
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000
+; SCALAR-NEXT: cmovneq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $20, %rax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $1048576, %edx # imm = 0x100000
+; SCALAR-NEXT: cmovneq %rax, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $21, %r8
+; SCALAR-NEXT: movl %esi, %eax
+; SCALAR-NEXT: andl $2097152, %eax # imm = 0x200000
+; SCALAR-NEXT: cmovneq %r8, %rax
+; SCALAR-NEXT: xorq %rdx, %rax
+; SCALAR-NEXT: xorq %rcx, %rax
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $22, %rcx
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $4194304, %edx # imm = 0x400000
+; SCALAR-NEXT: cmovneq %rcx, %rdx
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $23, %rcx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
+; SCALAR-NEXT: cmovneq %rcx, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $24, %rcx
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $16777216, %edx # imm = 0x1000000
+; SCALAR-NEXT: cmovneq %rcx, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $25, %rcx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000
+; SCALAR-NEXT: cmovneq %rcx, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $26, %rcx
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $67108864, %edx # imm = 0x4000000
+; SCALAR-NEXT: cmovneq %rcx, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $27, %rcx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000
+; SCALAR-NEXT: cmovneq %rcx, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $28, %rdx
+; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: andl $268435456, %ecx # imm = 0x10000000
+; SCALAR-NEXT: cmovneq %rdx, %rcx
+; SCALAR-NEXT: xorq %r8, %rcx
+; SCALAR-NEXT: xorq %rax, %rcx
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $29, %rax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $536870912, %edx # imm = 0x20000000
+; SCALAR-NEXT: cmovneq %rax, %rdx
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $30, %rax
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: andl $1073741824, %r8d # imm = 0x40000000
+; SCALAR-NEXT: cmovneq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rax
+; SCALAR-NEXT: shlq $31, %rax
+; SCALAR-NEXT: movl %esi, %edx
+; SCALAR-NEXT: andl $-2147483648, %edx # imm = 0x80000000
+; SCALAR-NEXT: cmovneq %rax, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $32, %r8
+; SCALAR-NEXT: xorl %eax, %eax
+; SCALAR-NEXT: btq $32, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $33, %rdx
+; SCALAR-NEXT: btq $33, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $34, %r8
+; SCALAR-NEXT: btq $34, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %r9
+; SCALAR-NEXT: shlq $35, %r9
+; SCALAR-NEXT: btq $35, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r9
+; SCALAR-NEXT: xorq %r8, %r9
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $36, %rdx
+; SCALAR-NEXT: btq $36, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rdx
+; SCALAR-NEXT: xorq %r9, %rdx
+; SCALAR-NEXT: xorq %rcx, %rdx
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $37, %rcx
+; SCALAR-NEXT: btq $37, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rcx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $38, %r8
+; SCALAR-NEXT: btq $38, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rcx, %r8
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $39, %rcx
+; SCALAR-NEXT: btq $39, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rcx
+; SCALAR-NEXT: xorq %r8, %rcx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $40, %r8
+; SCALAR-NEXT: btq $40, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rcx, %r8
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $41, %rcx
+; SCALAR-NEXT: btq $41, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rcx
+; SCALAR-NEXT: xorq %r8, %rcx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $42, %r8
+; SCALAR-NEXT: btq $42, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rcx, %r8
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $43, %rcx
+; SCALAR-NEXT: btq $43, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rcx
+; SCALAR-NEXT: xorq %r8, %rcx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $44, %r8
+; SCALAR-NEXT: btq $44, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rcx, %r8
+; SCALAR-NEXT: movq %rdi, %r9
+; SCALAR-NEXT: shlq $45, %r9
+; SCALAR-NEXT: btq $45, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r9
+; SCALAR-NEXT: xorq %r8, %r9
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $46, %rcx
+; SCALAR-NEXT: btq $46, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rcx
+; SCALAR-NEXT: xorq %r9, %rcx
+; SCALAR-NEXT: xorq %rdx, %rcx
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $47, %rdx
+; SCALAR-NEXT: btq $47, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rdx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $48, %r8
+; SCALAR-NEXT: btq $48, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $49, %rdx
+; SCALAR-NEXT: btq $49, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $50, %r8
+; SCALAR-NEXT: btq $50, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $51, %rdx
+; SCALAR-NEXT: btq $51, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $52, %r8
+; SCALAR-NEXT: btq $52, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $53, %rdx
+; SCALAR-NEXT: btq $53, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $54, %r8
+; SCALAR-NEXT: btq $54, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $55, %rdx
+; SCALAR-NEXT: btq $55, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $56, %r8
+; SCALAR-NEXT: btq $56, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rdx, %r8
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: shlq $57, %rdx
+; SCALAR-NEXT: btq $57, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rdx
+; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: xorq %rcx, %rdx
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $58, %rcx
+; SCALAR-NEXT: btq $58, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rcx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $59, %r8
+; SCALAR-NEXT: btq $59, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rcx, %r8
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $60, %rcx
+; SCALAR-NEXT: btq $60, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rcx
+; SCALAR-NEXT: xorq %r8, %rcx
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: shlq $61, %r8
+; SCALAR-NEXT: btq $61, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %r8
+; SCALAR-NEXT: xorq %rcx, %r8
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: shlq $62, %rcx
+; SCALAR-NEXT: btq $62, %rsi
+; SCALAR-NEXT: cmovaeq %rax, %rcx
+; SCALAR-NEXT: xorq %r8, %rcx
+; SCALAR-NEXT: shlq $63, %rdi
+; SCALAR-NEXT: btq $63, %rsi
+; SCALAR-NEXT: cmovbq %rdi, %rax
+; SCALAR-NEXT: xorq %rcx, %rax
+; SCALAR-NEXT: xorq %rdx, %rax
+; SCALAR-NEXT: retq
+;
+; SSE-PCLMUL-LABEL: clmul_i64:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movq %rsi, %xmm0
; SSE-PCLMUL-NEXT: movq %rdi, %xmm1
@@ -660,39 +776,87 @@ define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
define i8 @clmulr_i8(i8 %a, i8 %b) nounwind {
; SCALAR-LABEL: clmulr_i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $2, %ecx
-; SCALAR-NEXT: movzbl %dil, %eax
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $1, %edx
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $4, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
+; SCALAR-NEXT: pushq %rbp
+; SCALAR-NEXT: pushq %r15
+; SCALAR-NEXT: pushq %r14
+; SCALAR-NEXT: pushq %rbx
+; SCALAR-NEXT: movzbl %dil, %ecx
+; SCALAR-NEXT: movl %ecx, %r11d
+; SCALAR-NEXT: shll $8, %r11d
+; SCALAR-NEXT: movl %ecx, %r10d
+; SCALAR-NEXT: shll $9, %r10d
+; SCALAR-NEXT: movl %ecx, %r9d
+; SCALAR-NEXT: shll $10, %r9d
+; SCALAR-NEXT: movl %ecx, %eax
+; SCALAR-NEXT: shll $11, %eax
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: shll $12, %r8d
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: shll $13, %edx
+; SCALAR-NEXT: xorl %ebx, %ebx
+; SCALAR-NEXT: testw %bx, %bx
+; SCALAR-NEXT: cmovel %ebx, %edx
+; SCALAR-NEXT: cmovel %ebx, %r8d
+; SCALAR-NEXT: cmovel %ebx, %eax
+; SCALAR-NEXT: cmovel %ebx, %r9d
+; SCALAR-NEXT: cmovel %ebx, %r10d
+; SCALAR-NEXT: cmovel %ebx, %r11d
+; SCALAR-NEXT: shll $14, %edi
+; SCALAR-NEXT: testw %bx, %bx
+; SCALAR-NEXT: cmovnel %edi, %ebx
+; SCALAR-NEXT: movl %esi, %edi
+; SCALAR-NEXT: andl $1, %edi
+; SCALAR-NEXT: cmovnel %ecx, %edi
+; SCALAR-NEXT: leal (%rcx,%rcx), %ebp
+; SCALAR-NEXT: movl %esi, %r14d
+; SCALAR-NEXT: andl $2, %r14d
+; SCALAR-NEXT: cmovnel %ebp, %r14d
+; SCALAR-NEXT: xorl %edi, %r14d
+; SCALAR-NEXT: leal (,%rcx,4), %edi
+; SCALAR-NEXT: movl %esi, %ebp
+; SCALAR-NEXT: andl $4, %ebp
+; SCALAR-NEXT: cmovnel %edi, %ebp
+; SCALAR-NEXT: leal (,%rcx,8), %r15d
; SCALAR-NEXT: movl %esi, %edi
; SCALAR-NEXT: andl $8, %edi
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %ecx, %edi
-; SCALAR-NEXT: xorl %edx, %edi
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $16, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $32, %edx
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $64, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: xorl %edx, %ecx
-; SCALAR-NEXT: xorl %edi, %ecx
+; SCALAR-NEXT: cmovnel %r15d, %edi
+; SCALAR-NEXT: xorl %ebp, %edi
+; SCALAR-NEXT: xorl %r14d, %edi
+; SCALAR-NEXT: movl %ecx, %ebp
+; SCALAR-NEXT: shll $4, %ebp
+; SCALAR-NEXT: movl %esi, %r14d
+; SCALAR-NEXT: andl $16, %r14d
+; SCALAR-NEXT: cmovnel %ebp, %r14d
+; SCALAR-NEXT: movl %ecx, %ebp
+; SCALAR-NEXT: shll $5, %ebp
+; SCALAR-NEXT: movl %esi, %r15d
+; SCALAR-NEXT: andl $32, %r15d
+; SCALAR-NEXT: cmovnel %ebp, %r15d
+; SCALAR-NEXT: xorl %r14d, %r15d
+; SCALAR-NEXT: movl %ecx, %ebp
+; SCALAR-NEXT: shll $6, %ebp
+; SCALAR-NEXT: movl %esi, %r14d
+; SCALAR-NEXT: andl $64, %r14d
+; SCALAR-NEXT: cmovnel %ebp, %r14d
+; SCALAR-NEXT: xorl %r15d, %r14d
+; SCALAR-NEXT: xorl %edi, %r14d
+; SCALAR-NEXT: shll $7, %ecx
; SCALAR-NEXT: andl $128, %esi
-; SCALAR-NEXT: imull %esi, %eax
+; SCALAR-NEXT: cmovel %esi, %ecx
+; SCALAR-NEXT: xorl %r11d, %ecx
+; SCALAR-NEXT: xorl %r10d, %ecx
+; SCALAR-NEXT: xorl %r9d, %ecx
+; SCALAR-NEXT: xorl %r14d, %ecx
+; SCALAR-NEXT: xorl %r8d, %eax
+; SCALAR-NEXT: xorl %edx, %eax
+; SCALAR-NEXT: xorl %ebx, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: shrl $7, %eax
; SCALAR-NEXT: # kill: def $al killed $al killed $eax
+; SCALAR-NEXT: popq %rbx
+; SCALAR-NEXT: popq %r14
+; SCALAR-NEXT: popq %r15
+; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulr_i8:
@@ -729,71 +893,194 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind {
define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
; SCALAR-LABEL: clmulr_i16:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: pushq %rbp
+; SCALAR-NEXT: pushq %r15
+; SCALAR-NEXT: pushq %r14
+; SCALAR-NEXT: pushq %r13
+; SCALAR-NEXT: pushq %r12
+; SCALAR-NEXT: pushq %rbx
+; SCALAR-NEXT: movl %esi, %r10d
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: movl %edi, %r13d
+; SCALAR-NEXT: movl %edi, %r12d
+; SCALAR-NEXT: movl %edi, %r15d
+; SCALAR-NEXT: movl %edi, %r9d
+; SCALAR-NEXT: movl %edi, %ebp
+; SCALAR-NEXT: movl %edi, %ebx
+; SCALAR-NEXT: movl %edi, %r11d
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: movl %edi, %esi
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: movl %edi, %r14d
+; SCALAR-NEXT: shll $16, %r14d
+; SCALAR-NEXT: shll $17, %ecx
+; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: shll $18, %r13d
+; SCALAR-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: shll $19, %r12d
+; SCALAR-NEXT: movl %r12d, %r13d
+; SCALAR-NEXT: shll $20, %r15d
+; SCALAR-NEXT: movl %r15d, %r12d
+; SCALAR-NEXT: shll $21, %r9d
+; SCALAR-NEXT: movl %r9d, %r15d
+; SCALAR-NEXT: shll $22, %ebp
+; SCALAR-NEXT: shll $23, %ebx
+; SCALAR-NEXT: movl %ebx, %r9d
+; SCALAR-NEXT: shll $24, %r11d
+; SCALAR-NEXT: movl %r11d, %ebx
+; SCALAR-NEXT: shll $25, %r8d
+; SCALAR-NEXT: movl %r8d, %r11d
+; SCALAR-NEXT: shll $26, %eax
+; SCALAR-NEXT: movl %eax, %r8d
+; SCALAR-NEXT: shll $27, %esi
+; SCALAR-NEXT: shll $28, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $29, %ecx
+; SCALAR-NEXT: xorl %eax, %eax
+; SCALAR-NEXT: testl $0, %eax
+; SCALAR-NEXT: cmovel %eax, %ecx
+; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %edx
+; SCALAR-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %esi
+; SCALAR-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %r8d
+; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %r11d
+; SCALAR-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %ebx
+; SCALAR-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %r9d
+; SCALAR-NEXT: cmovel %eax, %ebp
+; SCALAR-NEXT: cmovel %eax, %r15d
+; SCALAR-NEXT: movl %r15d, %r8d
+; SCALAR-NEXT: cmovel %eax, %r12d
+; SCALAR-NEXT: movl %r12d, %r15d
+; SCALAR-NEXT: cmovel %eax, %r13d
+; SCALAR-NEXT: movl %r13d, %r12d
+; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; SCALAR-NEXT: cmovel %eax, %ecx
+; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Reload
+; SCALAR-NEXT: cmovel %eax, %r13d
+; SCALAR-NEXT: cmovel %eax, %r14d
+; SCALAR-NEXT: movl %edi, %r11d
+; SCALAR-NEXT: shll $30, %r11d
+; SCALAR-NEXT: testl $0, %eax
+; SCALAR-NEXT: cmovel %eax, %r11d
+; SCALAR-NEXT: movzwl %di, %edi
+; SCALAR-NEXT: movl %r10d, %eax
+; SCALAR-NEXT: andl $1, %eax
+; SCALAR-NEXT: cmovnel %edi, %eax
+; SCALAR-NEXT: movl %r10d, %ecx
; SCALAR-NEXT: andl $2, %ecx
-; SCALAR-NEXT: movzwl %di, %eax
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $1, %edx
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $4, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edi
-; SCALAR-NEXT: andl $8, %edi
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %ecx, %edi
-; SCALAR-NEXT: xorl %edx, %edi
-; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: leal (%rdi,%rdi), %esi
+; SCALAR-NEXT: cmovnel %esi, %ecx
+; SCALAR-NEXT: xorl %eax, %ecx
+; SCALAR-NEXT: movl %r10d, %eax
+; SCALAR-NEXT: andl $4, %eax
+; SCALAR-NEXT: leal (,%rdi,4), %esi
+; SCALAR-NEXT: cmovnel %esi, %eax
+; SCALAR-NEXT: movl %r10d, %esi
+; SCALAR-NEXT: andl $8, %esi
+; SCALAR-NEXT: leal (,%rdi,8), %ebx
+; SCALAR-NEXT: cmovnel %ebx, %esi
+; SCALAR-NEXT: xorl %eax, %esi
+; SCALAR-NEXT: xorl %ecx, %esi
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $4, %eax
+; SCALAR-NEXT: movl %r10d, %ecx
; SCALAR-NEXT: andl $16, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andl $32, %r8d
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: xorl %ecx, %r8d
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $64, %edx
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %r8d, %edx
-; SCALAR-NEXT: xorl %edi, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $128, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edi
-; SCALAR-NEXT: andl $256, %edi # imm = 0x100
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %ecx, %edi
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andl $512, %r8d # imm = 0x200
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: xorl %edi, %r8d
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $1024, %ecx # imm = 0x400
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: xorl %r8d, %ecx
+; SCALAR-NEXT: cmovnel %eax, %ecx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $5, %eax
+; SCALAR-NEXT: movl %r10d, %ebx
+; SCALAR-NEXT: andl $32, %ebx
+; SCALAR-NEXT: cmovnel %eax, %ebx
+; SCALAR-NEXT: xorl %ecx, %ebx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $6, %ecx
+; SCALAR-NEXT: movl %r10d, %eax
+; SCALAR-NEXT: andl $64, %eax
+; SCALAR-NEXT: cmovnel %ecx, %eax
+; SCALAR-NEXT: xorl %ebx, %eax
+; SCALAR-NEXT: xorl %esi, %eax
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $7, %ecx
+; SCALAR-NEXT: movl %r10d, %esi
+; SCALAR-NEXT: andl $128, %esi
+; SCALAR-NEXT: cmovnel %ecx, %esi
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $8, %ecx
+; SCALAR-NEXT: movl %r10d, %ebx
+; SCALAR-NEXT: andl $256, %ebx # imm = 0x100
+; SCALAR-NEXT: cmovnel %ecx, %ebx
+; SCALAR-NEXT: xorl %esi, %ebx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $9, %ecx
+; SCALAR-NEXT: movl %r10d, %edx
+; SCALAR-NEXT: andl $512, %edx # imm = 0x200
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: xorl %ebx, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $10, %ecx
+; SCALAR-NEXT: movl %r10d, %esi
+; SCALAR-NEXT: andl $1024, %esi # imm = 0x400
+; SCALAR-NEXT: cmovnel %ecx, %esi
+; SCALAR-NEXT: xorl %edx, %esi
+; SCALAR-NEXT: xorl %eax, %esi
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $11, %eax
+; SCALAR-NEXT: movl %r10d, %ecx
+; SCALAR-NEXT: andl $2048, %ecx # imm = 0x800
+; SCALAR-NEXT: cmovnel %eax, %ecx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $12, %eax
+; SCALAR-NEXT: movl %r10d, %edx
+; SCALAR-NEXT: andl $4096, %edx # imm = 0x1000
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: xorl %ecx, %edx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $13, %eax
+; SCALAR-NEXT: movl %r10d, %ecx
+; SCALAR-NEXT: andl $8192, %ecx # imm = 0x2000
+; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: xorl %edx, %ecx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $2048, %edx # imm = 0x800
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: movl %esi, %edi
-; SCALAR-NEXT: andl $4096, %edi # imm = 0x1000
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %edx, %edi
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %edi, %edx
-; SCALAR-NEXT: movl %esi, %edi
-; SCALAR-NEXT: andl $16384, %edi # imm = 0x4000
-; SCALAR-NEXT: imull %eax, %edi
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $14, %eax
+; SCALAR-NEXT: movl %r10d, %edx
+; SCALAR-NEXT: andl $16384, %edx # imm = 0x4000
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: xorl %ecx, %edx
+; SCALAR-NEXT: shll $15, %edi
+; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000
+; SCALAR-NEXT: cmovel %r10d, %edi
; SCALAR-NEXT: xorl %edx, %edi
-; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000
-; SCALAR-NEXT: imull %esi, %eax
-; SCALAR-NEXT: xorl %edi, %eax
-; SCALAR-NEXT: xorl %ecx, %eax
-; SCALAR-NEXT: shrl $15, %eax
-; SCALAR-NEXT: # kill: def $ax killed $ax killed $eax
+; SCALAR-NEXT: xorl %esi, %edi
+; SCALAR-NEXT: xorl %r13d, %r14d
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl %r12d, %r14d
+; SCALAR-NEXT: xorl %r15d, %r14d
+; SCALAR-NEXT: xorl %r8d, %r14d
+; SCALAR-NEXT: xorl %edi, %r14d
+; SCALAR-NEXT: xorl %r9d, %ebp
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
+; SCALAR-NEXT: xorl %r14d, %ebp
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl %ebp, %r11d
+; SCALAR-NEXT: shrl $15, %r11d
+; SCALAR-NEXT: movl %r11d, %eax
+; SCALAR-NEXT: popq %rbx
+; SCALAR-NEXT: popq %r12
+; SCALAR-NEXT: popq %r13
+; SCALAR-NEXT: popq %r14
+; SCALAR-NEXT: popq %r15
+; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulr_i16:
@@ -830,134 +1117,349 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
define i32 @clmulr_i32(i32 %a, i32 %b) nounwind {
; SCALAR-LABEL: clmulr_i32:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %edi, %ecx
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $2, %edx
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $1, %esi
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdx, %rsi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $4, %edx
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $8, %edi
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: leaq (%rax,%rax), %rdx
+; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: movl %ecx, %esi
+; SCALAR-NEXT: andl $2, %esi
+; SCALAR-NEXT: cmovneq %rdx, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $1, %edi
+; SCALAR-NEXT: cmovneq %rax, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $16, %edx
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %r8d
+; SCALAR-NEXT: leaq (,%rax,4), %rdx
+; SCALAR-NEXT: movl %ecx, %esi
+; SCALAR-NEXT: andl $4, %esi
+; SCALAR-NEXT: cmovneq %rdx, %rsi
+; SCALAR-NEXT: leaq (,%rax,8), %r8
+; SCALAR-NEXT: movl %ecx, %edx
+; SCALAR-NEXT: andl $8, %edx
+; SCALAR-NEXT: cmovneq %r8, %rdx
+; SCALAR-NEXT: xorq %rsi, %rdx
+; SCALAR-NEXT: xorq %rdi, %rdx
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $4, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $16, %edi
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $5, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $32, %r8d
-; SCALAR-NEXT: imulq %rcx, %r8
-; SCALAR-NEXT: xorq %rdx, %r8
-; SCALAR-NEXT: movl %eax, %esi
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $6, %rdi
+; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $64, %esi
-; SCALAR-NEXT: imulq %rcx, %rsi
+; SCALAR-NEXT: cmovneq %rdi, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $128, %edx
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $256, %edi # imm = 0x100
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
-; SCALAR-NEXT: movl %eax, %r8d
-; SCALAR-NEXT: andl $512, %r8d # imm = 0x200
-; SCALAR-NEXT: imulq %rcx, %r8
+; SCALAR-NEXT: xorq %rdx, %rsi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $7, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $128, %edi
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $8, %rdx
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
+; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
-; SCALAR-NEXT: movl %eax, %edx
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $9, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $512, %edi # imm = 0x200
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $10, %r8
+; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $1024, %edx # imm = 0x400
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: cmovneq %r8, %rdx
+; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $2048, %esi # imm = 0x800
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $4096, %edi # imm = 0x1000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $8192, %esi # imm = 0x2000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $16384, %edi # imm = 0x4000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $11, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $2048, %edi # imm = 0x800
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $12, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $13, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $8192, %edi # imm = 0x2000
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $14, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $15, %rdi
+; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
+; SCALAR-NEXT: cmovneq %rdi, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: xorq %rdx, %rsi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $65536, %edx # imm = 0x10000
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $131072, %edi # imm = 0x20000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $262144, %edx # imm = 0x40000
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: xorq %rdi, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $524288, %edi # imm = 0x80000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
-; SCALAR-NEXT: movl %eax, %r8d
-; SCALAR-NEXT: andl $1048576, %r8d # imm = 0x100000
-; SCALAR-NEXT: imulq %rcx, %r8
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $16, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $65536, %edi # imm = 0x10000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $17, %rdx
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000
+; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
-; SCALAR-NEXT: movl %eax, %edx
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $18, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $262144, %edi # imm = 0x40000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $19, %rdx
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000
+; SCALAR-NEXT: cmovneq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $20, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $1048576, %edi # imm = 0x100000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $21, %r8
+; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $2097152, %edx # imm = 0x200000
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: cmovneq %r8, %rdx
+; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $4194304, %esi # imm = 0x400000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $8388608, %edi # imm = 0x800000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $16777216, %esi # imm = 0x1000000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $33554432, %edi # imm = 0x2000000
-; SCALAR-NEXT: imulq %rcx, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $22, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $4194304, %edi # imm = 0x400000
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $23, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $24, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $16777216, %edi # imm = 0x1000000
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $25, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $26, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $67108864, %edi # imm = 0x4000000
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $27, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $28, %rdi
+; SCALAR-NEXT: movl %ecx, %esi
+; SCALAR-NEXT: andl $268435456, %esi # imm = 0x10000000
+; SCALAR-NEXT: cmovneq %rdi, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: xorq %rdx, %rsi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $29, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $536870912, %edi # imm = 0x20000000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $30, %rdx
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $1073741824, %r8d # imm = 0x40000000
+; SCALAR-NEXT: cmovneq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $31, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $-2147483648, %edi # imm = 0x80000000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $32, %r8
+; SCALAR-NEXT: xorl %edx, %edx
+; SCALAR-NEXT: btq $32, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $33, %rdi
+; SCALAR-NEXT: btq $33, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $34, %r8
+; SCALAR-NEXT: btq $34, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %r9
+; SCALAR-NEXT: shlq $35, %r9
+; SCALAR-NEXT: btq $35, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r9
+; SCALAR-NEXT: xorq %r8, %r9
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $36, %rdi
+; SCALAR-NEXT: btq $36, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r9, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $67108864, %esi # imm = 0x4000000
-; SCALAR-NEXT: imulq %rcx, %rsi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $37, %rsi
+; SCALAR-NEXT: btq $37, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $38, %r8
+; SCALAR-NEXT: btq $38, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $39, %rsi
+; SCALAR-NEXT: btq $39, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $40, %r8
+; SCALAR-NEXT: btq $40, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $41, %rsi
+; SCALAR-NEXT: btq $41, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $42, %r8
+; SCALAR-NEXT: btq $42, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $43, %rsi
+; SCALAR-NEXT: btq $43, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $44, %r8
+; SCALAR-NEXT: btq $44, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %r9
+; SCALAR-NEXT: shlq $45, %r9
+; SCALAR-NEXT: btq $45, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r9
+; SCALAR-NEXT: xorq %r8, %r9
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $46, %rsi
+; SCALAR-NEXT: btq $46, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r9, %rsi
; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $134217728, %edi # imm = 0x8000000
-; SCALAR-NEXT: imulq %rcx, %rdi
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $47, %rdi
+; SCALAR-NEXT: btq $47, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $48, %r8
+; SCALAR-NEXT: btq $48, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $49, %rdi
+; SCALAR-NEXT: btq $49, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $50, %r8
+; SCALAR-NEXT: btq $50, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $51, %rdi
+; SCALAR-NEXT: btq $51, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $52, %r8
+; SCALAR-NEXT: btq $52, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $53, %rdi
+; SCALAR-NEXT: btq $53, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $54, %r8
+; SCALAR-NEXT: btq $54, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $55, %rdi
+; SCALAR-NEXT: btq $55, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $56, %r8
+; SCALAR-NEXT: btq $56, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $57, %rdi
+; SCALAR-NEXT: btq $57, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $268435456, %esi # imm = 0x10000000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: xorq %rdx, %rsi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $536870912, %edx # imm = 0x20000000
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $1073741824, %edi # imm = 0x40000000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
-; SCALAR-NEXT: andl $-2147483648, %eax # imm = 0x80000000
-; SCALAR-NEXT: imulq %rcx, %rax
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $58, %rsi
+; SCALAR-NEXT: btq $58, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $59, %r8
+; SCALAR-NEXT: btq $59, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $60, %rsi
+; SCALAR-NEXT: btq $60, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $61, %r8
+; SCALAR-NEXT: btq $61, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: shlq $62, %rax
+; SCALAR-NEXT: btq $62, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rax
+; SCALAR-NEXT: xorq %r8, %rax
; SCALAR-NEXT: xorq %rdi, %rax
-; SCALAR-NEXT: xorq %rsi, %rax
; SCALAR-NEXT: shrq $31, %rax
; SCALAR-NEXT: # kill: def $eax killed $eax killed $rax
; SCALAR-NEXT: retq
@@ -992,374 +1494,410 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind {
define i64 @clmulr_i64(i64 %a, i64 %b) nounwind {
; SCALAR-LABEL: clmulr_i64:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: pushq %rbp
-; SCALAR-NEXT: pushq %r15
; SCALAR-NEXT: pushq %r14
-; SCALAR-NEXT: pushq %r13
-; SCALAR-NEXT: pushq %r12
; SCALAR-NEXT: pushq %rbx
-; SCALAR-NEXT: subq $40, %rsp
; SCALAR-NEXT: bswapq %rdi
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shrq $4, %rax
-; SCALAR-NEXT: movabsq $1085102592571150095, %r8 # imm = 0xF0F0F0F0F0F0F0F
-; SCALAR-NEXT: andq %r8, %rax
-; SCALAR-NEXT: andq %r8, %rdi
+; SCALAR-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; SCALAR-NEXT: andq %rcx, %rax
+; SCALAR-NEXT: andq %rcx, %rdi
; SCALAR-NEXT: shlq $4, %rdi
; SCALAR-NEXT: orq %rax, %rdi
-; SCALAR-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
-; SCALAR-NEXT: movq %rdi, %rax
-; SCALAR-NEXT: andq %rdx, %rax
+; SCALAR-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: andq %rax, %rdx
; SCALAR-NEXT: shrq $2, %rdi
-; SCALAR-NEXT: andq %rdx, %rdi
-; SCALAR-NEXT: leaq (%rdi,%rax,4), %rax
-; SCALAR-NEXT: movabsq $6148914691236517205, %r9 # imm = 0x5555555555555555
-; SCALAR-NEXT: movq %rax, %rcx
-; SCALAR-NEXT: andq %r9, %rcx
-; SCALAR-NEXT: shrq %rax
-; SCALAR-NEXT: andq %r9, %rax
-; SCALAR-NEXT: leaq (%rax,%rcx,2), %rdi
+; SCALAR-NEXT: andq %rax, %rdi
+; SCALAR-NEXT: leaq (%rdi,%rdx,4), %rdi
+; SCALAR-NEXT: movabsq $6148914691236517205, %rdx # imm = 0x5555555555555555
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: andq %rdx, %r8
+; SCALAR-NEXT: shrq %rdi
+; SCALAR-NEXT: movq %rdi, %r9
+; SCALAR-NEXT: andq %rdx, %r9
+; SCALAR-NEXT: leaq (%r9,%r8,2), %r8
+; SCALAR-NEXT: leaq (%r8,%r8), %r9
; SCALAR-NEXT: bswapq %rsi
-; SCALAR-NEXT: movq %rsi, %rax
-; SCALAR-NEXT: shrq $4, %rax
-; SCALAR-NEXT: andq %r8, %rax
-; SCALAR-NEXT: andq %r8, %rsi
+; SCALAR-NEXT: movq %rsi, %r10
+; SCALAR-NEXT: shrq $4, %r10
+; SCALAR-NEXT: andq %rcx, %r10
+; SCALAR-NEXT: andq %rcx, %rsi
; SCALAR-NEXT: shlq $4, %rsi
-; SCALAR-NEXT: orq %rax, %rsi
-; SCALAR-NEXT: movq %rsi, %rax
-; SCALAR-NEXT: andq %rdx, %rax
+; SCALAR-NEXT: orq %r10, %rsi
+; SCALAR-NEXT: movq %rsi, %r10
+; SCALAR-NEXT: andq %rax, %r10
; SCALAR-NEXT: shrq $2, %rsi
+; SCALAR-NEXT: andq %rax, %rsi
+; SCALAR-NEXT: leaq (%rsi,%r10,4), %rsi
+; SCALAR-NEXT: movq %rsi, %r10
+; SCALAR-NEXT: andq %rdx, %r10
+; SCALAR-NEXT: shrq %rsi
; SCALAR-NEXT: andq %rdx, %rsi
-; SCALAR-NEXT: leaq (%rsi,%rax,4), %rax
-; SCALAR-NEXT: movq %rax, %rcx
-; SCALAR-NEXT: andq %r9, %rcx
-; SCALAR-NEXT: shrq %rax
-; SCALAR-NEXT: andq %r9, %rax
-; SCALAR-NEXT: leaq (%rax,%rcx,2), %rsi
+; SCALAR-NEXT: leaq (%rsi,%r10,2), %rsi
; SCALAR-NEXT: movl %esi, %r10d
-; SCALAR-NEXT: movl %esi, %r15d
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: movl %esi, %r14d
+; SCALAR-NEXT: andl $2, %r10d
+; SCALAR-NEXT: cmovneq %r9, %r10
; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $1, %r11d
+; SCALAR-NEXT: cmovneq %r8, %r11
+; SCALAR-NEXT: xorq %r10, %r11
+; SCALAR-NEXT: leaq (,%r8,4), %r9
+; SCALAR-NEXT: movl %esi, %r10d
+; SCALAR-NEXT: andl $4, %r10d
+; SCALAR-NEXT: cmovneq %r9, %r10
+; SCALAR-NEXT: leaq (,%r8,8), %rbx
; SCALAR-NEXT: movl %esi, %r9d
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $2, %edx
-; SCALAR-NEXT: imulq %rdi, %rdx
-; SCALAR-NEXT: andl $1, %r10d
-; SCALAR-NEXT: imulq %rdi, %r10
-; SCALAR-NEXT: xorq %rdx, %r10
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $4, %r15d
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: andl $8, %ecx
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %r15, %rcx
-; SCALAR-NEXT: movl %esi, %r13d
-; SCALAR-NEXT: xorq %r10, %rcx
+; SCALAR-NEXT: andl $8, %r9d
+; SCALAR-NEXT: cmovneq %rbx, %r9
+; SCALAR-NEXT: xorq %r10, %r9
+; SCALAR-NEXT: xorq %r11, %r9
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $4, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $16, %r11d
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $5, %r10
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $32, %ebx
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $6, %r11
; SCALAR-NEXT: movl %esi, %r10d
-; SCALAR-NEXT: andl $16, %r14d
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: andl $32, %r11d
-; SCALAR-NEXT: imulq %rdi, %r11
-; SCALAR-NEXT: xorq %r14, %r11
-; SCALAR-NEXT: movl %esi, %r14d
-; SCALAR-NEXT: andl $64, %eax
-; SCALAR-NEXT: imulq %rdi, %rax
-; SCALAR-NEXT: xorq %r11, %rax
+; SCALAR-NEXT: andl $64, %r10d
+; SCALAR-NEXT: cmovneq %r11, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: xorq %r9, %r10
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $7, %r9
; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: xorq %rcx, %rax
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $128, %ebx
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: andl $256, %r9d # imm = 0x100
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: xorq %rbx, %r9
-; SCALAR-NEXT: movl %esi, %ebp
-; SCALAR-NEXT: andl $512, %r8d # imm = 0x200
-; SCALAR-NEXT: imulq %rdi, %r8
-; SCALAR-NEXT: xorq %r9, %r8
-; SCALAR-NEXT: movl %esi, %r15d
-; SCALAR-NEXT: andl $1024, %edx # imm = 0x400
-; SCALAR-NEXT: imulq %rdi, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
-; SCALAR-NEXT: movl %esi, %r12d
-; SCALAR-NEXT: xorq %rax, %rdx
+; SCALAR-NEXT: andl $128, %r11d
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $8, %r9
; SCALAR-NEXT: movl %esi, %ebx
-; SCALAR-NEXT: andl $2048, %r13d # imm = 0x800
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: andl $4096, %r10d # imm = 0x1000
-; SCALAR-NEXT: imulq %rdi, %r10
-; SCALAR-NEXT: xorq %r13, %r10
-; SCALAR-NEXT: movl %esi, %r13d
-; SCALAR-NEXT: andl $8192, %r14d # imm = 0x2000
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: xorq %r10, %r14
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $16384, %r11d # imm = 0x4000
-; SCALAR-NEXT: imulq %rdi, %r11
-; SCALAR-NEXT: xorq %r14, %r11
-; SCALAR-NEXT: movl %esi, %r14d
-; SCALAR-NEXT: andl $32768, %ecx # imm = 0x8000
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %r11, %rcx
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: xorq %rdx, %rcx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $65536, %ebp # imm = 0x10000
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: andl $131072, %r15d # imm = 0x20000
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: xorq %rbp, %r15
+; SCALAR-NEXT: andl $256, %ebx # imm = 0x100
+; SCALAR-NEXT: cmovneq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $9, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $512, %r11d # imm = 0x200
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $10, %rbx
; SCALAR-NEXT: movl %esi, %r9d
-; SCALAR-NEXT: andl $262144, %r12d # imm = 0x40000
-; SCALAR-NEXT: imulq %rdi, %r12
-; SCALAR-NEXT: xorq %r15, %r12
+; SCALAR-NEXT: andl $1024, %r9d # imm = 0x400
+; SCALAR-NEXT: cmovneq %rbx, %r9
+; SCALAR-NEXT: xorq %r11, %r9
+; SCALAR-NEXT: xorq %r10, %r9
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $11, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $2048, %r11d # imm = 0x800
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $12, %r10
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $4096, %ebx # imm = 0x1000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $13, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $8192, %r11d # imm = 0x2000
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $14, %r10
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $16384, %ebx # imm = 0x4000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $15, %r11
; SCALAR-NEXT: movl %esi, %r10d
+; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000
+; SCALAR-NEXT: cmovneq %r11, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: xorq %r9, %r10
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $16, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $65536, %r11d # imm = 0x10000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $17, %r9
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $131072, %ebx # imm = 0x20000
+; SCALAR-NEXT: cmovneq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $18, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $262144, %r11d # imm = 0x40000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $19, %r9
+; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $524288, %ebx # imm = 0x80000
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: xorq %r12, %rbx
+; SCALAR-NEXT: cmovneq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $20, %r9
; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: andl $1048576, %r13d # imm = 0x100000
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbx, %r13
-; SCALAR-NEXT: movl %esi, %r15d
-; SCALAR-NEXT: andl $2097152, %eax # imm = 0x200000
-; SCALAR-NEXT: imulq %rdi, %rax
-; SCALAR-NEXT: xorq %r13, %rax
+; SCALAR-NEXT: andl $1048576, %r11d # imm = 0x100000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $21, %rbx
+; SCALAR-NEXT: movl %esi, %r9d
+; SCALAR-NEXT: andl $2097152, %r9d # imm = 0x200000
+; SCALAR-NEXT: cmovneq %rbx, %r9
+; SCALAR-NEXT: xorq %r11, %r9
+; SCALAR-NEXT: xorq %r10, %r9
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $22, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $4194304, %r11d # imm = 0x400000
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $23, %r10
; SCALAR-NEXT: movl %esi, %ebx
-; SCALAR-NEXT: xorq %rcx, %rax
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $4194304, %r14d # imm = 0x400000
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
-; SCALAR-NEXT: imulq %rdi, %r8
-; SCALAR-NEXT: xorq %r14, %r8
-; SCALAR-NEXT: movabsq $4294967296, %rbp # imm = 0x100000000
-; SCALAR-NEXT: andq %rsi, %rbp
-; SCALAR-NEXT: andl $16777216, %edx # imm = 0x1000000
-; SCALAR-NEXT: imulq %rdi, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
-; SCALAR-NEXT: movabsq $8589934592, %r8 # imm = 0x200000000
-; SCALAR-NEXT: andq %rsi, %r8
-; SCALAR-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $33554432, %r9d # imm = 0x2000000
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: xorq %rdx, %r9
-; SCALAR-NEXT: movabsq $17179869184, %rdx # imm = 0x400000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $67108864, %r10d # imm = 0x4000000
-; SCALAR-NEXT: imulq %rdi, %r10
+; SCALAR-NEXT: andl $8388608, %ebx # imm = 0x800000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $24, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $16777216, %r11d # imm = 0x1000000
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $25, %r10
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $33554432, %ebx # imm = 0x2000000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $26, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $67108864, %r11d # imm = 0x4000000
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $27, %r10
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $134217728, %ebx # imm = 0x8000000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $28, %r11
+; SCALAR-NEXT: movl %esi, %r10d
+; SCALAR-NEXT: andl $268435456, %r10d # imm = 0x10000000
+; SCALAR-NEXT: cmovneq %r11, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: xorq %r9, %r10
-; SCALAR-NEXT: movabsq $34359738368, %rdx # imm = 0x800000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $134217728, %r11d # imm = 0x8000000
-; SCALAR-NEXT: imulq %rdi, %r11
-; SCALAR-NEXT: xorq %r10, %r11
-; SCALAR-NEXT: movabsq $68719476736, %rdx # imm = 0x1000000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $268435456, %r15d # imm = 0x10000000
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: xorq %r11, %r15
-; SCALAR-NEXT: movabsq $137438953472, %rdx # imm = 0x2000000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movq %rdx, (%rsp) # 8-byte Spill
-; SCALAR-NEXT: xorq %rax, %r15
-; SCALAR-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $274877906944, %r13 # imm = 0x4000000000
-; SCALAR-NEXT: andq %rsi, %r13
-; SCALAR-NEXT: andl $536870912, %ebx # imm = 0x20000000
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: andl $1073741824, %ecx # imm = 0x40000000
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %rbx, %rcx
-; SCALAR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $549755813888, %rax # imm = 0x8000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $1099511627776, %rax # imm = 0x10000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $2199023255552, %rax # imm = 0x20000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $4398046511104, %rax # imm = 0x40000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $8796093022208, %rax # imm = 0x80000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $17592186044416, %rax # imm = 0x100000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $35184372088832, %rax # imm = 0x200000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $70368744177664, %rax # imm = 0x400000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $140737488355328, %rax # imm = 0x800000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $281474976710656, %rax # imm = 0x1000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $562949953421312, %rax # imm = 0x2000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $1125899906842624, %rax # imm = 0x4000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $2251799813685248, %rax # imm = 0x8000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $4503599627370496, %r12 # imm = 0x10000000000000
-; SCALAR-NEXT: andq %rsi, %r12
-; SCALAR-NEXT: movabsq $9007199254740992, %r15 # imm = 0x20000000000000
-; SCALAR-NEXT: andq %rsi, %r15
-; SCALAR-NEXT: movabsq $18014398509481984, %r14 # imm = 0x40000000000000
-; SCALAR-NEXT: andq %rsi, %r14
-; SCALAR-NEXT: movabsq $36028797018963968, %r11 # imm = 0x80000000000000
-; SCALAR-NEXT: andq %rsi, %r11
-; SCALAR-NEXT: movabsq $72057594037927936, %rax # imm = 0x100000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $144115188075855872, %rbx # imm = 0x200000000000000
-; SCALAR-NEXT: andq %rsi, %rbx
-; SCALAR-NEXT: movabsq $288230376151711744, %r10 # imm = 0x400000000000000
-; SCALAR-NEXT: andq %rsi, %r10
-; SCALAR-NEXT: movabsq $576460752303423488, %r9 # imm = 0x800000000000000
-; SCALAR-NEXT: andq %rsi, %r9
-; SCALAR-NEXT: movabsq $1152921504606846976, %r8 # imm = 0x1000000000000000
-; SCALAR-NEXT: andq %rsi, %r8
-; SCALAR-NEXT: movabsq $2305843009213693952, %rdx # imm = 0x2000000000000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movabsq $4611686018427387904, %rcx # imm = 0x4000000000000000
-; SCALAR-NEXT: andq %rsi, %rcx
-; SCALAR-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: # kill: def $esi killed $esi killed $rsi def $rsi
-; SCALAR-NEXT: andl $-2147483648, %esi # imm = 0x80000000
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; SCALAR-NEXT: movq (%rsp), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: xorq %rsi, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: imulq %rdi, %r12
-; SCALAR-NEXT: xorq %rbp, %r12
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: xorq %r12, %r15
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: xorq %r15, %r14
-; SCALAR-NEXT: imulq %rdi, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $29, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $536870912, %r11d # imm = 0x20000000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $30, %r9
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $1073741824, %ebx # imm = 0x40000000
+; SCALAR-NEXT: cmovneq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $31, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $-2147483648, %r11d # imm = 0x80000000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $32, %rbx
+; SCALAR-NEXT: xorl %r9d, %r9d
+; SCALAR-NEXT: btq $32, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $33, %r11
+; SCALAR-NEXT: btq $33, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $34, %rbx
+; SCALAR-NEXT: btq $34, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r14
+; SCALAR-NEXT: shlq $35, %r14
+; SCALAR-NEXT: btq $35, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r14
+; SCALAR-NEXT: xorq %rbx, %r14
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $36, %r11
+; SCALAR-NEXT: btq $36, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %r14, %r11
-; SCALAR-NEXT: xorq %r13, %r11
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: xorq %rsi, %rbx
-; SCALAR-NEXT: imulq %rdi, %r10
+; SCALAR-NEXT: xorq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $37, %r10
+; SCALAR-NEXT: btq $37, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $38, %rbx
+; SCALAR-NEXT: btq $38, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $39, %r10
+; SCALAR-NEXT: btq $39, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: xorq %r10, %r9
-; SCALAR-NEXT: imulq %rdi, %r8
-; SCALAR-NEXT: xorq %r9, %r8
-; SCALAR-NEXT: imulq %rdi, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %rdx, %rcx
-; SCALAR-NEXT: imulq %rdi, %rax
-; SCALAR-NEXT: xorq %rcx, %rax
-; SCALAR-NEXT: xorq %r11, %rax
-; SCALAR-NEXT: bswapq %rax
-; SCALAR-NEXT: movq %rax, %rcx
-; SCALAR-NEXT: shrq $4, %rcx
-; SCALAR-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
-; SCALAR-NEXT: andq %rdx, %rcx
-; SCALAR-NEXT: andq %rdx, %rax
-; SCALAR-NEXT: shlq $4, %rax
-; SCALAR-NEXT: orq %rcx, %rax
-; SCALAR-NEXT: movq %rax, %rcx
-; SCALAR-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
-; SCALAR-NEXT: andq %rdx, %rcx
-; SCALAR-NEXT: shrq $2, %rax
-; SCALAR-NEXT: andq %rdx, %rax
-; SCALAR-NEXT: leaq (%rax,%rcx,4), %rax
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $40, %rbx
+; SCALAR-NEXT: btq $40, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $41, %r10
+; SCALAR-NEXT: btq $41, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $42, %rbx
+; SCALAR-NEXT: btq $42, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $43, %r10
+; SCALAR-NEXT: btq $43, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $44, %rbx
+; SCALAR-NEXT: btq $44, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r14
+; SCALAR-NEXT: shlq $45, %r14
+; SCALAR-NEXT: btq $45, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r14
+; SCALAR-NEXT: xorq %rbx, %r14
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $46, %r10
+; SCALAR-NEXT: btq $46, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: xorq %r14, %r10
+; SCALAR-NEXT: xorq %r11, %r10
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $47, %r11
+; SCALAR-NEXT: btq $47, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $48, %rbx
+; SCALAR-NEXT: btq $48, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $49, %r11
+; SCALAR-NEXT: btq $49, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $50, %rbx
+; SCALAR-NEXT: btq $50, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $51, %r11
+; SCALAR-NEXT: btq $51, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $52, %rbx
+; SCALAR-NEXT: btq $52, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $53, %r11
+; SCALAR-NEXT: btq $53, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $54, %rbx
+; SCALAR-NEXT: btq $54, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $55, %r11
+; SCALAR-NEXT: btq $55, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $56, %rbx
+; SCALAR-NEXT: btq $56, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $57, %r11
+; SCALAR-NEXT: btq $57, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: xorq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $58, %r10
+; SCALAR-NEXT: btq $58, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $59, %rbx
+; SCALAR-NEXT: btq $59, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $60, %r10
+; SCALAR-NEXT: btq $60, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $61, %rbx
+; SCALAR-NEXT: btq $61, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: shlq $62, %r8
+; SCALAR-NEXT: btq $62, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r8
+; SCALAR-NEXT: xorq %rbx, %r8
+; SCALAR-NEXT: shlq $63, %rdi
+; SCALAR-NEXT: btq $63, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: xorq %r11, %rdi
+; SCALAR-NEXT: bswapq %rdi
+; SCALAR-NEXT: movq %rdi, %rsi
+; SCALAR-NEXT: shrq $4, %rsi
+; SCALAR-NEXT: andq %rcx, %rsi
+; SCALAR-NEXT: andq %rcx, %rdi
+; SCALAR-NEXT: shlq $4, %rdi
+; SCALAR-NEXT: orq %rsi, %rdi
+; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: andq %rax, %rcx
+; SCALAR-NEXT: shrq $2, %rdi
+; SCALAR-NEXT: andq %rax, %rdi
+; SCALAR-NEXT: leaq (%rdi,%rcx,4), %rax
; SCALAR-NEXT: movq %rax, %rcx
-; SCALAR-NEXT: movabsq $6148914691236517205, %rdx # imm = 0x5555555555555555
; SCALAR-NEXT: andq %rdx, %rcx
; SCALAR-NEXT: shrq %rax
; SCALAR-NEXT: andq %rdx, %rax
; SCALAR-NEXT: leaq (%rax,%rcx,2), %rax
-; SCALAR-NEXT: addq $40, %rsp
; SCALAR-NEXT: popq %rbx
-; SCALAR-NEXT: popq %r12
-; SCALAR-NEXT: popq %r13
; SCALAR-NEXT: popq %r14
-; SCALAR-NEXT: popq %r15
-; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE2-PCLMUL-LABEL: clmulr_i64:
@@ -1403,39 +1941,93 @@ define i64 @clmulr_i64(i64 %a, i64 %b) nounwind {
define i8 @clmulh_i8(i8 %a, i8 %b) nounwind {
; SCALAR-LABEL: clmulh_i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $2, %ecx
-; SCALAR-NEXT: movzbl %dil, %eax
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $1, %edx
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $4, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
+; SCALAR-NEXT: pushq %rbp
+; SCALAR-NEXT: pushq %r15
+; SCALAR-NEXT: pushq %r14
+; SCALAR-NEXT: pushq %r12
+; SCALAR-NEXT: pushq %rbx
+; SCALAR-NEXT: movzbl %dil, %ecx
+; SCALAR-NEXT: movl %ecx, %ebx
+; SCALAR-NEXT: shll $8, %ebx
+; SCALAR-NEXT: movl %ecx, %r11d
+; SCALAR-NEXT: shll $9, %r11d
+; SCALAR-NEXT: movl %ecx, %r10d
+; SCALAR-NEXT: shll $10, %r10d
+; SCALAR-NEXT: movl %ecx, %eax
+; SCALAR-NEXT: shll $11, %eax
+; SCALAR-NEXT: movl %ecx, %r9d
+; SCALAR-NEXT: shll $12, %r9d
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: shll $13, %r8d
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: shll $14, %edx
+; SCALAR-NEXT: xorl %ebp, %ebp
+; SCALAR-NEXT: testw %bp, %bp
+; SCALAR-NEXT: cmovel %ebp, %edx
+; SCALAR-NEXT: cmovel %ebp, %r8d
+; SCALAR-NEXT: cmovel %ebp, %r9d
+; SCALAR-NEXT: cmovel %ebp, %eax
+; SCALAR-NEXT: cmovel %ebp, %r10d
+; SCALAR-NEXT: cmovel %ebp, %r11d
+; SCALAR-NEXT: cmovel %ebp, %ebx
+; SCALAR-NEXT: shll $15, %edi
+; SCALAR-NEXT: testw %bp, %bp
+; SCALAR-NEXT: cmovnel %edi, %ebp
+; SCALAR-NEXT: movl %esi, %edi
+; SCALAR-NEXT: andl $1, %edi
+; SCALAR-NEXT: cmovnel %ecx, %edi
+; SCALAR-NEXT: leal (%rcx,%rcx), %r14d
+; SCALAR-NEXT: movl %esi, %r15d
+; SCALAR-NEXT: andl $2, %r15d
+; SCALAR-NEXT: cmovnel %r14d, %r15d
+; SCALAR-NEXT: xorl %edi, %r15d
+; SCALAR-NEXT: leal (,%rcx,4), %edi
+; SCALAR-NEXT: movl %esi, %r14d
+; SCALAR-NEXT: andl $4, %r14d
+; SCALAR-NEXT: cmovnel %edi, %r14d
+; SCALAR-NEXT: leal (,%rcx,8), %r12d
; SCALAR-NEXT: movl %esi, %edi
; SCALAR-NEXT: andl $8, %edi
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %ecx, %edi
-; SCALAR-NEXT: xorl %edx, %edi
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $16, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $32, %edx
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $64, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: xorl %edx, %ecx
-; SCALAR-NEXT: xorl %edi, %ecx
+; SCALAR-NEXT: cmovnel %r12d, %edi
+; SCALAR-NEXT: xorl %r14d, %edi
+; SCALAR-NEXT: xorl %r15d, %edi
+; SCALAR-NEXT: movl %ecx, %r14d
+; SCALAR-NEXT: shll $4, %r14d
+; SCALAR-NEXT: movl %esi, %r15d
+; SCALAR-NEXT: andl $16, %r15d
+; SCALAR-NEXT: cmovnel %r14d, %r15d
+; SCALAR-NEXT: movl %ecx, %r14d
+; SCALAR-NEXT: shll $5, %r14d
+; SCALAR-NEXT: movl %esi, %r12d
+; SCALAR-NEXT: andl $32, %r12d
+; SCALAR-NEXT: cmovnel %r14d, %r12d
+; SCALAR-NEXT: xorl %r15d, %r12d
+; SCALAR-NEXT: movl %ecx, %r14d
+; SCALAR-NEXT: shll $6, %r14d
+; SCALAR-NEXT: movl %esi, %r15d
+; SCALAR-NEXT: andl $64, %r15d
+; SCALAR-NEXT: cmovnel %r14d, %r15d
+; SCALAR-NEXT: xorl %r12d, %r15d
+; SCALAR-NEXT: xorl %edi, %r15d
+; SCALAR-NEXT: shll $7, %ecx
; SCALAR-NEXT: andl $128, %esi
-; SCALAR-NEXT: imull %esi, %eax
+; SCALAR-NEXT: cmovel %esi, %ecx
+; SCALAR-NEXT: xorl %ebx, %ecx
+; SCALAR-NEXT: xorl %r11d, %ecx
+; SCALAR-NEXT: xorl %r10d, %ecx
+; SCALAR-NEXT: xorl %r15d, %ecx
+; SCALAR-NEXT: xorl %r9d, %eax
+; SCALAR-NEXT: xorl %r8d, %eax
+; SCALAR-NEXT: xorl %edx, %eax
+; SCALAR-NEXT: xorl %ebp, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: shrl $8, %eax
; SCALAR-NEXT: # kill: def $al killed $al killed $eax
+; SCALAR-NEXT: popq %rbx
+; SCALAR-NEXT: popq %r12
+; SCALAR-NEXT: popq %r14
+; SCALAR-NEXT: popq %r15
+; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulh_i8:
@@ -1472,71 +2064,199 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind {
define i16 @clmulh_i16(i16 %a, i16 %b) nounwind {
; SCALAR-LABEL: clmulh_i16:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: pushq %rbp
+; SCALAR-NEXT: pushq %r15
+; SCALAR-NEXT: pushq %r14
+; SCALAR-NEXT: pushq %r13
+; SCALAR-NEXT: pushq %r12
+; SCALAR-NEXT: pushq %rbx
+; SCALAR-NEXT: movl %esi, %r9d
+; SCALAR-NEXT: movl %edi, %r14d
+; SCALAR-NEXT: movl %edi, %r13d
+; SCALAR-NEXT: movl %edi, %r12d
+; SCALAR-NEXT: movl %edi, %r15d
+; SCALAR-NEXT: movl %edi, %ebp
+; SCALAR-NEXT: movl %edi, %r10d
+; SCALAR-NEXT: movl %edi, %ebx
+; SCALAR-NEXT: movl %edi, %r11d
+; SCALAR-NEXT: movl %edi, %esi
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: movl %edi, %edx
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: shll $16, %r8d
+; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: shll $17, %r14d
+; SCALAR-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: shll $18, %r13d
+; SCALAR-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: shll $19, %r12d
+; SCALAR-NEXT: movl %r12d, %r13d
+; SCALAR-NEXT: shll $20, %r15d
+; SCALAR-NEXT: movl %r15d, %r12d
+; SCALAR-NEXT: shll $21, %ebp
+; SCALAR-NEXT: movl %ebp, %r15d
+; SCALAR-NEXT: shll $22, %r10d
+; SCALAR-NEXT: shll $23, %ebx
+; SCALAR-NEXT: movl %ebx, %r14d
+; SCALAR-NEXT: shll $24, %r11d
+; SCALAR-NEXT: movl %r11d, %ebp
+; SCALAR-NEXT: shll $25, %esi
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: shll $26, %ecx
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: shll $27, %eax
+; SCALAR-NEXT: movl %eax, %esi
+; SCALAR-NEXT: shll $28, %edx
+; SCALAR-NEXT: movl %edi, %r11d
+; SCALAR-NEXT: shll $29, %r11d
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $30, %ecx
+; SCALAR-NEXT: xorl %eax, %eax
+; SCALAR-NEXT: testl $0, %eax
+; SCALAR-NEXT: cmovel %eax, %ecx
+; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %r11d
+; SCALAR-NEXT: cmovel %eax, %edx
+; SCALAR-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %esi
+; SCALAR-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %r8d
+; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %ebx
+; SCALAR-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %ebp
+; SCALAR-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %r14d
+; SCALAR-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %r10d
+; SCALAR-NEXT: cmovel %eax, %r15d
+; SCALAR-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SCALAR-NEXT: cmovel %eax, %r12d
+; SCALAR-NEXT: cmovel %eax, %r13d
+; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload
+; SCALAR-NEXT: cmovel %eax, %r15d
+; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Reload
+; SCALAR-NEXT: cmovel %eax, %r14d
+; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload
+; SCALAR-NEXT: cmovel %eax, %r8d
+; SCALAR-NEXT: movl %edi, %ebx
+; SCALAR-NEXT: shll $31, %ebx
+; SCALAR-NEXT: testl $0, %eax
+; SCALAR-NEXT: cmovel %eax, %ebx
+; SCALAR-NEXT: movzwl %di, %edi
+; SCALAR-NEXT: movl %r9d, %eax
+; SCALAR-NEXT: andl $1, %eax
+; SCALAR-NEXT: cmovnel %edi, %eax
+; SCALAR-NEXT: movl %r9d, %ecx
; SCALAR-NEXT: andl $2, %ecx
-; SCALAR-NEXT: movzwl %di, %eax
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $1, %edx
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %ecx, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $4, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edi
-; SCALAR-NEXT: andl $8, %edi
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %ecx, %edi
-; SCALAR-NEXT: xorl %edx, %edi
-; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: leal (%rdi,%rdi), %esi
+; SCALAR-NEXT: cmovnel %esi, %ecx
+; SCALAR-NEXT: xorl %eax, %ecx
+; SCALAR-NEXT: movl %r9d, %eax
+; SCALAR-NEXT: andl $4, %eax
+; SCALAR-NEXT: leal (,%rdi,4), %esi
+; SCALAR-NEXT: cmovnel %esi, %eax
+; SCALAR-NEXT: movl %r9d, %esi
+; SCALAR-NEXT: andl $8, %esi
+; SCALAR-NEXT: leal (,%rdi,8), %ebp
+; SCALAR-NEXT: cmovnel %ebp, %esi
+; SCALAR-NEXT: xorl %eax, %esi
+; SCALAR-NEXT: xorl %ecx, %esi
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $4, %eax
+; SCALAR-NEXT: movl %r9d, %ecx
; SCALAR-NEXT: andl $16, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andl $32, %r8d
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: xorl %ecx, %r8d
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $64, %edx
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %r8d, %edx
-; SCALAR-NEXT: xorl %edi, %edx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $128, %ecx
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: movl %esi, %edi
-; SCALAR-NEXT: andl $256, %edi # imm = 0x100
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %ecx, %edi
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andl $512, %r8d # imm = 0x200
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: xorl %edi, %r8d
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $1024, %ecx # imm = 0x400
-; SCALAR-NEXT: imull %eax, %ecx
-; SCALAR-NEXT: xorl %r8d, %ecx
+; SCALAR-NEXT: cmovnel %eax, %ecx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $5, %eax
+; SCALAR-NEXT: movl %r9d, %ebp
+; SCALAR-NEXT: andl $32, %ebp
+; SCALAR-NEXT: cmovnel %eax, %ebp
+; SCALAR-NEXT: xorl %ecx, %ebp
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $6, %ecx
+; SCALAR-NEXT: movl %r9d, %eax
+; SCALAR-NEXT: andl $64, %eax
+; SCALAR-NEXT: cmovnel %ecx, %eax
+; SCALAR-NEXT: xorl %ebp, %eax
+; SCALAR-NEXT: xorl %esi, %eax
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $7, %ecx
+; SCALAR-NEXT: movl %r9d, %esi
+; SCALAR-NEXT: andl $128, %esi
+; SCALAR-NEXT: cmovnel %ecx, %esi
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $8, %ecx
+; SCALAR-NEXT: movl %r9d, %ebp
+; SCALAR-NEXT: andl $256, %ebp # imm = 0x100
+; SCALAR-NEXT: cmovnel %ecx, %ebp
+; SCALAR-NEXT: xorl %esi, %ebp
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $9, %ecx
+; SCALAR-NEXT: movl %r9d, %edx
+; SCALAR-NEXT: andl $512, %edx # imm = 0x200
+; SCALAR-NEXT: cmovnel %ecx, %edx
+; SCALAR-NEXT: xorl %ebp, %edx
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shll $10, %ecx
+; SCALAR-NEXT: movl %r9d, %esi
+; SCALAR-NEXT: andl $1024, %esi # imm = 0x400
+; SCALAR-NEXT: cmovnel %ecx, %esi
+; SCALAR-NEXT: xorl %edx, %esi
+; SCALAR-NEXT: xorl %eax, %esi
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $11, %eax
+; SCALAR-NEXT: movl %r9d, %ecx
+; SCALAR-NEXT: andl $2048, %ecx # imm = 0x800
+; SCALAR-NEXT: cmovnel %eax, %ecx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $12, %eax
+; SCALAR-NEXT: movl %r9d, %edx
+; SCALAR-NEXT: andl $4096, %edx # imm = 0x1000
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: xorl %ecx, %edx
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $13, %eax
+; SCALAR-NEXT: movl %r9d, %ecx
+; SCALAR-NEXT: andl $8192, %ecx # imm = 0x2000
+; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: xorl %edx, %ecx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $2048, %edx # imm = 0x800
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: movl %esi, %edi
-; SCALAR-NEXT: andl $4096, %edi # imm = 0x1000
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %edx, %edi
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000
-; SCALAR-NEXT: imull %eax, %edx
-; SCALAR-NEXT: xorl %edi, %edx
-; SCALAR-NEXT: movl %esi, %edi
-; SCALAR-NEXT: andl $16384, %edi # imm = 0x4000
-; SCALAR-NEXT: imull %eax, %edi
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: shll $14, %eax
+; SCALAR-NEXT: movl %r9d, %edx
+; SCALAR-NEXT: andl $16384, %edx # imm = 0x4000
+; SCALAR-NEXT: cmovnel %eax, %edx
+; SCALAR-NEXT: xorl %ecx, %edx
+; SCALAR-NEXT: shll $15, %edi
+; SCALAR-NEXT: andl $32768, %r9d # imm = 0x8000
+; SCALAR-NEXT: cmovel %r9d, %edi
; SCALAR-NEXT: xorl %edx, %edi
-; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000
-; SCALAR-NEXT: imull %esi, %eax
-; SCALAR-NEXT: xorl %edi, %eax
-; SCALAR-NEXT: xorl %ecx, %eax
-; SCALAR-NEXT: shrl $16, %eax
-; SCALAR-NEXT: # kill: def $ax killed $ax killed $eax
+; SCALAR-NEXT: xorl %esi, %edi
+; SCALAR-NEXT: xorl %r14d, %r8d
+; SCALAR-NEXT: xorl %r15d, %r8d
+; SCALAR-NEXT: xorl %r13d, %r8d
+; SCALAR-NEXT: xorl %r12d, %r8d
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl %edi, %r8d
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl %r8d, %r10d
+; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload
+; SCALAR-NEXT: xorl %ebx, %r11d
+; SCALAR-NEXT: xorl %r10d, %r11d
+; SCALAR-NEXT: shrl $16, %r11d
+; SCALAR-NEXT: movl %r11d, %eax
+; SCALAR-NEXT: popq %rbx
+; SCALAR-NEXT: popq %r12
+; SCALAR-NEXT: popq %r13
+; SCALAR-NEXT: popq %r14
+; SCALAR-NEXT: popq %r15
+; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulh_i16:
@@ -1573,134 +2293,354 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind {
define i32 @clmulh_i32(i32 %a, i32 %b) nounwind {
; SCALAR-LABEL: clmulh_i32:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %edi, %ecx
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $2, %edx
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $1, %esi
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdx, %rsi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $4, %edx
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $8, %edi
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
+; SCALAR-NEXT: movl %edi, %eax
+; SCALAR-NEXT: leaq (%rax,%rax), %rdx
+; SCALAR-NEXT: movl %esi, %ecx
+; SCALAR-NEXT: movl %ecx, %esi
+; SCALAR-NEXT: andl $2, %esi
+; SCALAR-NEXT: cmovneq %rdx, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $1, %edi
+; SCALAR-NEXT: cmovneq %rax, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $16, %edx
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %r8d
+; SCALAR-NEXT: leaq (,%rax,4), %rdx
+; SCALAR-NEXT: movl %ecx, %esi
+; SCALAR-NEXT: andl $4, %esi
+; SCALAR-NEXT: cmovneq %rdx, %rsi
+; SCALAR-NEXT: leaq (,%rax,8), %r8
+; SCALAR-NEXT: movl %ecx, %edx
+; SCALAR-NEXT: andl $8, %edx
+; SCALAR-NEXT: cmovneq %r8, %rdx
+; SCALAR-NEXT: xorq %rsi, %rdx
+; SCALAR-NEXT: xorq %rdi, %rdx
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $4, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $16, %edi
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $5, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $32, %r8d
-; SCALAR-NEXT: imulq %rcx, %r8
-; SCALAR-NEXT: xorq %rdx, %r8
-; SCALAR-NEXT: movl %eax, %esi
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $6, %rdi
+; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $64, %esi
-; SCALAR-NEXT: imulq %rcx, %rsi
+; SCALAR-NEXT: cmovneq %rdi, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $128, %edx
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $256, %edi # imm = 0x100
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
-; SCALAR-NEXT: movl %eax, %r8d
-; SCALAR-NEXT: andl $512, %r8d # imm = 0x200
-; SCALAR-NEXT: imulq %rcx, %r8
+; SCALAR-NEXT: xorq %rdx, %rsi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $7, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $128, %edi
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $8, %rdx
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
+; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
-; SCALAR-NEXT: movl %eax, %edx
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $9, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $512, %edi # imm = 0x200
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $10, %r8
+; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $1024, %edx # imm = 0x400
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: cmovneq %r8, %rdx
+; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $2048, %esi # imm = 0x800
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $4096, %edi # imm = 0x1000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $8192, %esi # imm = 0x2000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $16384, %edi # imm = 0x4000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $11, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $2048, %edi # imm = 0x800
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $12, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $13, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $8192, %edi # imm = 0x2000
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $14, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $15, %rdi
+; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
+; SCALAR-NEXT: cmovneq %rdi, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: xorq %rdx, %rsi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $65536, %edx # imm = 0x10000
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $131072, %edi # imm = 0x20000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $262144, %edx # imm = 0x40000
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: xorq %rdi, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $524288, %edi # imm = 0x80000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
-; SCALAR-NEXT: movl %eax, %r8d
-; SCALAR-NEXT: andl $1048576, %r8d # imm = 0x100000
-; SCALAR-NEXT: imulq %rcx, %r8
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $16, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $65536, %edi # imm = 0x10000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $17, %rdx
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000
+; SCALAR-NEXT: cmovneq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $18, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $262144, %edi # imm = 0x40000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $19, %rdx
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000
+; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
-; SCALAR-NEXT: movl %eax, %edx
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $20, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $1048576, %edi # imm = 0x100000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $21, %r8
+; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $2097152, %edx # imm = 0x200000
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
+; SCALAR-NEXT: cmovneq %r8, %rdx
+; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $4194304, %esi # imm = 0x400000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $8388608, %edi # imm = 0x800000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $16777216, %esi # imm = 0x1000000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $33554432, %edi # imm = 0x2000000
-; SCALAR-NEXT: imulq %rcx, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $22, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $4194304, %edi # imm = 0x400000
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $23, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $24, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $16777216, %edi # imm = 0x1000000
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $25, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $26, %rsi
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $67108864, %edi # imm = 0x4000000
+; SCALAR-NEXT: cmovneq %rsi, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $27, %rsi
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000
+; SCALAR-NEXT: cmovneq %rsi, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $28, %rdi
+; SCALAR-NEXT: movl %ecx, %esi
+; SCALAR-NEXT: andl $268435456, %esi # imm = 0x10000000
+; SCALAR-NEXT: cmovneq %rdi, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: xorq %rdx, %rsi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $29, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $536870912, %edi # imm = 0x20000000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $30, %rdx
+; SCALAR-NEXT: movl %ecx, %r8d
+; SCALAR-NEXT: andl $1073741824, %r8d # imm = 0x40000000
+; SCALAR-NEXT: cmovneq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdx
+; SCALAR-NEXT: shlq $31, %rdx
+; SCALAR-NEXT: movl %ecx, %edi
+; SCALAR-NEXT: andl $-2147483648, %edi # imm = 0x80000000
+; SCALAR-NEXT: cmovneq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $32, %r8
+; SCALAR-NEXT: xorl %edx, %edx
+; SCALAR-NEXT: btq $32, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $33, %rdi
+; SCALAR-NEXT: btq $33, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $34, %r8
+; SCALAR-NEXT: btq $34, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %r9
+; SCALAR-NEXT: shlq $35, %r9
+; SCALAR-NEXT: btq $35, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r9
+; SCALAR-NEXT: xorq %r8, %r9
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $36, %rdi
+; SCALAR-NEXT: btq $36, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r9, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $67108864, %esi # imm = 0x4000000
-; SCALAR-NEXT: imulq %rcx, %rsi
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $37, %rsi
+; SCALAR-NEXT: btq $37, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $38, %r8
+; SCALAR-NEXT: btq $38, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $39, %rsi
+; SCALAR-NEXT: btq $39, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $40, %r8
+; SCALAR-NEXT: btq $40, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $41, %rsi
+; SCALAR-NEXT: btq $41, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $42, %r8
+; SCALAR-NEXT: btq $42, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $43, %rsi
+; SCALAR-NEXT: btq $43, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $44, %r8
+; SCALAR-NEXT: btq $44, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %r9
+; SCALAR-NEXT: shlq $45, %r9
+; SCALAR-NEXT: btq $45, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r9
+; SCALAR-NEXT: xorq %r8, %r9
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $46, %rsi
+; SCALAR-NEXT: btq $46, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r9, %rsi
; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $134217728, %edi # imm = 0x8000000
-; SCALAR-NEXT: imulq %rcx, %rdi
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $47, %rdi
+; SCALAR-NEXT: btq $47, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $48, %r8
+; SCALAR-NEXT: btq $48, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $49, %rdi
+; SCALAR-NEXT: btq $49, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $50, %r8
+; SCALAR-NEXT: btq $50, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $51, %rdi
+; SCALAR-NEXT: btq $51, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $52, %r8
+; SCALAR-NEXT: btq $52, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $53, %rdi
+; SCALAR-NEXT: btq $53, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $54, %r8
+; SCALAR-NEXT: btq $54, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $55, %rdi
+; SCALAR-NEXT: btq $55, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $56, %r8
+; SCALAR-NEXT: btq $56, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rdi, %r8
+; SCALAR-NEXT: movq %rax, %rdi
+; SCALAR-NEXT: shlq $57, %rdi
+; SCALAR-NEXT: btq $57, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
-; SCALAR-NEXT: movl %eax, %esi
-; SCALAR-NEXT: andl $268435456, %esi # imm = 0x10000000
-; SCALAR-NEXT: imulq %rcx, %rsi
-; SCALAR-NEXT: xorq %rdi, %rsi
-; SCALAR-NEXT: xorq %rdx, %rsi
-; SCALAR-NEXT: movl %eax, %edx
-; SCALAR-NEXT: andl $536870912, %edx # imm = 0x20000000
-; SCALAR-NEXT: imulq %rcx, %rdx
-; SCALAR-NEXT: movl %eax, %edi
-; SCALAR-NEXT: andl $1073741824, %edi # imm = 0x40000000
-; SCALAR-NEXT: imulq %rcx, %rdi
-; SCALAR-NEXT: xorq %rdx, %rdi
-; SCALAR-NEXT: andl $-2147483648, %eax # imm = 0x80000000
-; SCALAR-NEXT: imulq %rcx, %rax
-; SCALAR-NEXT: xorq %rdi, %rax
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $58, %rsi
+; SCALAR-NEXT: btq $58, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $59, %r8
+; SCALAR-NEXT: btq $59, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $60, %rsi
+; SCALAR-NEXT: btq $60, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: movq %rax, %r8
+; SCALAR-NEXT: shlq $61, %r8
+; SCALAR-NEXT: btq $61, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %r8
+; SCALAR-NEXT: xorq %rsi, %r8
+; SCALAR-NEXT: movq %rax, %rsi
+; SCALAR-NEXT: shlq $62, %rsi
+; SCALAR-NEXT: btq $62, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rsi
+; SCALAR-NEXT: xorq %r8, %rsi
+; SCALAR-NEXT: shlq $63, %rax
+; SCALAR-NEXT: btq $63, %rcx
+; SCALAR-NEXT: cmovaeq %rdx, %rax
; SCALAR-NEXT: xorq %rsi, %rax
+; SCALAR-NEXT: xorq %rdi, %rax
; SCALAR-NEXT: shrq $32, %rax
; SCALAR-NEXT: # kill: def $eax killed $eax killed $rax
; SCALAR-NEXT: retq
@@ -1735,375 +2675,411 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind {
define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
; SCALAR-LABEL: clmulh_i64:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: pushq %rbp
-; SCALAR-NEXT: pushq %r15
; SCALAR-NEXT: pushq %r14
-; SCALAR-NEXT: pushq %r13
-; SCALAR-NEXT: pushq %r12
; SCALAR-NEXT: pushq %rbx
-; SCALAR-NEXT: subq $40, %rsp
; SCALAR-NEXT: bswapq %rdi
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shrq $4, %rax
-; SCALAR-NEXT: movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F
-; SCALAR-NEXT: andq %r9, %rax
-; SCALAR-NEXT: andq %r9, %rdi
+; SCALAR-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
+; SCALAR-NEXT: andq %rdx, %rax
+; SCALAR-NEXT: andq %rdx, %rdi
; SCALAR-NEXT: shlq $4, %rdi
; SCALAR-NEXT: orq %rax, %rdi
-; SCALAR-NEXT: movabsq $3689348814741910323, %r8 # imm = 0x3333333333333333
+; SCALAR-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
; SCALAR-NEXT: movq %rdi, %rax
-; SCALAR-NEXT: andq %r8, %rax
+; SCALAR-NEXT: andq %rcx, %rax
; SCALAR-NEXT: shrq $2, %rdi
-; SCALAR-NEXT: andq %r8, %rdi
-; SCALAR-NEXT: leaq (%rdi,%rax,4), %rax
-; SCALAR-NEXT: movabsq $6148914691236517205, %rdx # imm = 0x5555555555555555
-; SCALAR-NEXT: movq %rax, %rcx
-; SCALAR-NEXT: andq %rdx, %rcx
-; SCALAR-NEXT: shrq %rax
-; SCALAR-NEXT: andq %rdx, %rax
-; SCALAR-NEXT: leaq (%rax,%rcx,2), %rdi
+; SCALAR-NEXT: andq %rcx, %rdi
+; SCALAR-NEXT: leaq (%rdi,%rax,4), %rdi
+; SCALAR-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
+; SCALAR-NEXT: movq %rdi, %r8
+; SCALAR-NEXT: andq %rax, %r8
+; SCALAR-NEXT: shrq %rdi
+; SCALAR-NEXT: movq %rdi, %r9
+; SCALAR-NEXT: andq %rax, %r9
+; SCALAR-NEXT: leaq (%r9,%r8,2), %r8
+; SCALAR-NEXT: leaq (%r8,%r8), %r9
; SCALAR-NEXT: bswapq %rsi
-; SCALAR-NEXT: movq %rsi, %rax
-; SCALAR-NEXT: shrq $4, %rax
-; SCALAR-NEXT: andq %r9, %rax
-; SCALAR-NEXT: andq %r9, %rsi
+; SCALAR-NEXT: movq %rsi, %r10
+; SCALAR-NEXT: shrq $4, %r10
+; SCALAR-NEXT: andq %rdx, %r10
+; SCALAR-NEXT: andq %rdx, %rsi
; SCALAR-NEXT: shlq $4, %rsi
-; SCALAR-NEXT: orq %rax, %rsi
-; SCALAR-NEXT: movq %rsi, %rax
-; SCALAR-NEXT: andq %r8, %rax
+; SCALAR-NEXT: orq %r10, %rsi
+; SCALAR-NEXT: movq %rsi, %r10
+; SCALAR-NEXT: andq %rcx, %r10
; SCALAR-NEXT: shrq $2, %rsi
-; SCALAR-NEXT: andq %r8, %rsi
-; SCALAR-NEXT: leaq (%rsi,%rax,4), %rax
-; SCALAR-NEXT: movq %rax, %rcx
-; SCALAR-NEXT: andq %rdx, %rcx
-; SCALAR-NEXT: shrq %rax
-; SCALAR-NEXT: andq %rdx, %rax
-; SCALAR-NEXT: leaq (%rax,%rcx,2), %rsi
+; SCALAR-NEXT: andq %rcx, %rsi
+; SCALAR-NEXT: leaq (%rsi,%r10,4), %rsi
+; SCALAR-NEXT: movq %rsi, %r10
+; SCALAR-NEXT: andq %rax, %r10
+; SCALAR-NEXT: shrq %rsi
+; SCALAR-NEXT: andq %rax, %rsi
+; SCALAR-NEXT: leaq (%rsi,%r10,2), %rsi
; SCALAR-NEXT: movl %esi, %r10d
-; SCALAR-NEXT: movl %esi, %r15d
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: movl %esi, %r14d
+; SCALAR-NEXT: andl $2, %r10d
+; SCALAR-NEXT: cmovneq %r9, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $1, %r11d
+; SCALAR-NEXT: cmovneq %r8, %r11
+; SCALAR-NEXT: xorq %r10, %r11
+; SCALAR-NEXT: leaq (,%r8,4), %r9
+; SCALAR-NEXT: movl %esi, %r10d
+; SCALAR-NEXT: andl $4, %r10d
+; SCALAR-NEXT: cmovneq %r9, %r10
+; SCALAR-NEXT: leaq (,%r8,8), %rbx
+; SCALAR-NEXT: movl %esi, %r9d
+; SCALAR-NEXT: andl $8, %r9d
+; SCALAR-NEXT: cmovneq %rbx, %r9
+; SCALAR-NEXT: xorq %r10, %r9
+; SCALAR-NEXT: xorq %r11, %r9
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $4, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $16, %r11d
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $5, %r10
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $32, %ebx
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $6, %r11
+; SCALAR-NEXT: movl %esi, %r10d
+; SCALAR-NEXT: andl $64, %r10d
+; SCALAR-NEXT: cmovneq %r11, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: xorq %r9, %r10
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $7, %r9
; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: movl %esi, %eax
+; SCALAR-NEXT: andl $128, %r11d
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $8, %r9
; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $256, %ebx # imm = 0x100
+; SCALAR-NEXT: cmovneq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $9, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $512, %r11d # imm = 0x200
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $10, %rbx
; SCALAR-NEXT: movl %esi, %r9d
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $2, %edx
-; SCALAR-NEXT: imulq %rdi, %rdx
-; SCALAR-NEXT: andl $1, %r10d
-; SCALAR-NEXT: imulq %rdi, %r10
-; SCALAR-NEXT: xorq %rdx, %r10
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $4, %r15d
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: andl $8, %ecx
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %r15, %rcx
-; SCALAR-NEXT: movl %esi, %r13d
-; SCALAR-NEXT: xorq %r10, %rcx
-; SCALAR-NEXT: movl %esi, %r10d
-; SCALAR-NEXT: andl $16, %r14d
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: andl $32, %r11d
-; SCALAR-NEXT: imulq %rdi, %r11
-; SCALAR-NEXT: xorq %r14, %r11
-; SCALAR-NEXT: movl %esi, %r14d
-; SCALAR-NEXT: andl $64, %eax
-; SCALAR-NEXT: imulq %rdi, %rax
-; SCALAR-NEXT: xorq %r11, %rax
+; SCALAR-NEXT: andl $1024, %r9d # imm = 0x400
+; SCALAR-NEXT: cmovneq %rbx, %r9
+; SCALAR-NEXT: xorq %r11, %r9
+; SCALAR-NEXT: xorq %r10, %r9
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $11, %r10
; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: xorq %rcx, %rax
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $128, %ebx
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: andl $256, %r9d # imm = 0x100
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: xorq %rbx, %r9
-; SCALAR-NEXT: movl %esi, %ebp
-; SCALAR-NEXT: andl $512, %r8d # imm = 0x200
-; SCALAR-NEXT: imulq %rdi, %r8
-; SCALAR-NEXT: xorq %r9, %r8
-; SCALAR-NEXT: movl %esi, %r15d
-; SCALAR-NEXT: andl $1024, %edx # imm = 0x400
-; SCALAR-NEXT: imulq %rdi, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
-; SCALAR-NEXT: movl %esi, %r12d
-; SCALAR-NEXT: xorq %rax, %rdx
+; SCALAR-NEXT: andl $2048, %r11d # imm = 0x800
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $12, %r10
; SCALAR-NEXT: movl %esi, %ebx
-; SCALAR-NEXT: andl $2048, %r13d # imm = 0x800
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: andl $4096, %r10d # imm = 0x1000
-; SCALAR-NEXT: imulq %rdi, %r10
-; SCALAR-NEXT: xorq %r13, %r10
-; SCALAR-NEXT: movl %esi, %r13d
-; SCALAR-NEXT: andl $8192, %r14d # imm = 0x2000
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: xorq %r10, %r14
-; SCALAR-NEXT: movl %esi, %eax
-; SCALAR-NEXT: andl $16384, %r11d # imm = 0x4000
-; SCALAR-NEXT: imulq %rdi, %r11
-; SCALAR-NEXT: xorq %r14, %r11
-; SCALAR-NEXT: movl %esi, %r14d
-; SCALAR-NEXT: andl $32768, %ecx # imm = 0x8000
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %r11, %rcx
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: xorq %rdx, %rcx
-; SCALAR-NEXT: movl %esi, %edx
-; SCALAR-NEXT: andl $65536, %ebp # imm = 0x10000
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: andl $131072, %r15d # imm = 0x20000
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: xorq %rbp, %r15
-; SCALAR-NEXT: movl %esi, %r9d
-; SCALAR-NEXT: andl $262144, %r12d # imm = 0x40000
-; SCALAR-NEXT: imulq %rdi, %r12
-; SCALAR-NEXT: xorq %r15, %r12
+; SCALAR-NEXT: andl $4096, %ebx # imm = 0x1000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $13, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $8192, %r11d # imm = 0x2000
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $14, %r10
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $16384, %ebx # imm = 0x4000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $15, %r11
; SCALAR-NEXT: movl %esi, %r10d
+; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000
+; SCALAR-NEXT: cmovneq %r11, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: xorq %r9, %r10
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $16, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $65536, %r11d # imm = 0x10000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $17, %r9
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $131072, %ebx # imm = 0x20000
+; SCALAR-NEXT: cmovneq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $18, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $262144, %r11d # imm = 0x40000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $19, %r9
+; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $524288, %ebx # imm = 0x80000
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: xorq %r12, %rbx
+; SCALAR-NEXT: cmovneq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $20, %r9
; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: andl $1048576, %r13d # imm = 0x100000
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbx, %r13
-; SCALAR-NEXT: movl %esi, %r15d
-; SCALAR-NEXT: andl $2097152, %eax # imm = 0x200000
-; SCALAR-NEXT: imulq %rdi, %rax
-; SCALAR-NEXT: xorq %r13, %rax
+; SCALAR-NEXT: andl $1048576, %r11d # imm = 0x100000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $21, %rbx
+; SCALAR-NEXT: movl %esi, %r9d
+; SCALAR-NEXT: andl $2097152, %r9d # imm = 0x200000
+; SCALAR-NEXT: cmovneq %rbx, %r9
+; SCALAR-NEXT: xorq %r11, %r9
+; SCALAR-NEXT: xorq %r10, %r9
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $22, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $4194304, %r11d # imm = 0x400000
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $23, %r10
; SCALAR-NEXT: movl %esi, %ebx
-; SCALAR-NEXT: xorq %rcx, %rax
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andl $4194304, %r14d # imm = 0x400000
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
-; SCALAR-NEXT: imulq %rdi, %r8
-; SCALAR-NEXT: xorq %r14, %r8
-; SCALAR-NEXT: movabsq $4294967296, %rbp # imm = 0x100000000
-; SCALAR-NEXT: andq %rsi, %rbp
-; SCALAR-NEXT: andl $16777216, %edx # imm = 0x1000000
-; SCALAR-NEXT: imulq %rdi, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
-; SCALAR-NEXT: movabsq $8589934592, %r8 # imm = 0x200000000
-; SCALAR-NEXT: andq %rsi, %r8
-; SCALAR-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $33554432, %r9d # imm = 0x2000000
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: xorq %rdx, %r9
-; SCALAR-NEXT: movabsq $17179869184, %rdx # imm = 0x400000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $67108864, %r10d # imm = 0x4000000
-; SCALAR-NEXT: imulq %rdi, %r10
+; SCALAR-NEXT: andl $8388608, %ebx # imm = 0x800000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $24, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $16777216, %r11d # imm = 0x1000000
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $25, %r10
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $33554432, %ebx # imm = 0x2000000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $26, %r10
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $67108864, %r11d # imm = 0x4000000
+; SCALAR-NEXT: cmovneq %r10, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $27, %r10
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $134217728, %ebx # imm = 0x8000000
+; SCALAR-NEXT: cmovneq %r10, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $28, %r11
+; SCALAR-NEXT: movl %esi, %r10d
+; SCALAR-NEXT: andl $268435456, %r10d # imm = 0x10000000
+; SCALAR-NEXT: cmovneq %r11, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: xorq %r9, %r10
-; SCALAR-NEXT: movabsq $34359738368, %rdx # imm = 0x800000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $134217728, %r11d # imm = 0x8000000
-; SCALAR-NEXT: imulq %rdi, %r11
-; SCALAR-NEXT: xorq %r10, %r11
-; SCALAR-NEXT: movabsq $68719476736, %rdx # imm = 0x1000000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: andl $268435456, %r15d # imm = 0x10000000
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: xorq %r11, %r15
-; SCALAR-NEXT: movabsq $137438953472, %rdx # imm = 0x2000000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movq %rdx, (%rsp) # 8-byte Spill
-; SCALAR-NEXT: xorq %rax, %r15
-; SCALAR-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $274877906944, %r13 # imm = 0x4000000000
-; SCALAR-NEXT: andq %rsi, %r13
-; SCALAR-NEXT: andl $536870912, %ebx # imm = 0x20000000
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: andl $1073741824, %ecx # imm = 0x40000000
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %rbx, %rcx
-; SCALAR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $549755813888, %rax # imm = 0x8000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $1099511627776, %rax # imm = 0x10000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $2199023255552, %rax # imm = 0x20000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $4398046511104, %rax # imm = 0x40000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $8796093022208, %rax # imm = 0x80000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $17592186044416, %rax # imm = 0x100000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $35184372088832, %rax # imm = 0x200000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $70368744177664, %rax # imm = 0x400000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $140737488355328, %rax # imm = 0x800000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $281474976710656, %rax # imm = 0x1000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $562949953421312, %rax # imm = 0x2000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $1125899906842624, %rax # imm = 0x4000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $2251799813685248, %rax # imm = 0x8000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $4503599627370496, %r12 # imm = 0x10000000000000
-; SCALAR-NEXT: andq %rsi, %r12
-; SCALAR-NEXT: movabsq $9007199254740992, %r15 # imm = 0x20000000000000
-; SCALAR-NEXT: andq %rsi, %r15
-; SCALAR-NEXT: movabsq $18014398509481984, %r14 # imm = 0x40000000000000
-; SCALAR-NEXT: andq %rsi, %r14
-; SCALAR-NEXT: movabsq $36028797018963968, %r11 # imm = 0x80000000000000
-; SCALAR-NEXT: andq %rsi, %r11
-; SCALAR-NEXT: movabsq $72057594037927936, %rax # imm = 0x100000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SCALAR-NEXT: movabsq $144115188075855872, %rbx # imm = 0x200000000000000
-; SCALAR-NEXT: andq %rsi, %rbx
-; SCALAR-NEXT: movabsq $288230376151711744, %r10 # imm = 0x400000000000000
-; SCALAR-NEXT: andq %rsi, %r10
-; SCALAR-NEXT: movabsq $576460752303423488, %r9 # imm = 0x800000000000000
-; SCALAR-NEXT: andq %rsi, %r9
-; SCALAR-NEXT: movabsq $1152921504606846976, %r8 # imm = 0x1000000000000000
-; SCALAR-NEXT: andq %rsi, %r8
-; SCALAR-NEXT: movabsq $2305843009213693952, %rdx # imm = 0x2000000000000000
-; SCALAR-NEXT: andq %rsi, %rdx
-; SCALAR-NEXT: movabsq $4611686018427387904, %rcx # imm = 0x4000000000000000
-; SCALAR-NEXT: andq %rsi, %rcx
-; SCALAR-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SCALAR-NEXT: andq %rsi, %rax
-; SCALAR-NEXT: # kill: def $esi killed $esi killed $rsi def $rsi
-; SCALAR-NEXT: andl $-2147483648, %esi # imm = 0x80000000
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; SCALAR-NEXT: movq (%rsp), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: movq %r13, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %r13
-; SCALAR-NEXT: xorq %rbp, %r13
-; SCALAR-NEXT: xorq %rsi, %r13
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: xorq %rbp, %rsi
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rbp
-; SCALAR-NEXT: xorq %rsi, %rbp
-; SCALAR-NEXT: imulq %rdi, %r12
-; SCALAR-NEXT: xorq %rbp, %r12
-; SCALAR-NEXT: imulq %rdi, %r15
-; SCALAR-NEXT: xorq %r12, %r15
-; SCALAR-NEXT: imulq %rdi, %r14
-; SCALAR-NEXT: xorq %r15, %r14
-; SCALAR-NEXT: imulq %rdi, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $29, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $536870912, %r11d # imm = 0x20000000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $30, %r9
+; SCALAR-NEXT: movl %esi, %ebx
+; SCALAR-NEXT: andl $1073741824, %ebx # imm = 0x40000000
+; SCALAR-NEXT: cmovneq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r9
+; SCALAR-NEXT: shlq $31, %r9
+; SCALAR-NEXT: movl %esi, %r11d
+; SCALAR-NEXT: andl $-2147483648, %r11d # imm = 0x80000000
+; SCALAR-NEXT: cmovneq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $32, %rbx
+; SCALAR-NEXT: xorl %r9d, %r9d
+; SCALAR-NEXT: btq $32, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $33, %r11
+; SCALAR-NEXT: btq $33, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $34, %rbx
+; SCALAR-NEXT: btq $34, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r14
+; SCALAR-NEXT: shlq $35, %r14
+; SCALAR-NEXT: btq $35, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r14
+; SCALAR-NEXT: xorq %rbx, %r14
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $36, %r11
+; SCALAR-NEXT: btq $36, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %r14, %r11
-; SCALAR-NEXT: xorq %r13, %r11
-; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; SCALAR-NEXT: imulq %rdi, %rsi
-; SCALAR-NEXT: imulq %rdi, %rbx
-; SCALAR-NEXT: xorq %rsi, %rbx
-; SCALAR-NEXT: imulq %rdi, %r10
+; SCALAR-NEXT: xorq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $37, %r10
+; SCALAR-NEXT: btq $37, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $38, %rbx
+; SCALAR-NEXT: btq $38, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $39, %r10
+; SCALAR-NEXT: btq $39, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
-; SCALAR-NEXT: imulq %rdi, %r9
-; SCALAR-NEXT: xorq %r10, %r9
-; SCALAR-NEXT: imulq %rdi, %r8
-; SCALAR-NEXT: xorq %r9, %r8
-; SCALAR-NEXT: imulq %rdi, %rdx
-; SCALAR-NEXT: xorq %r8, %rdx
-; SCALAR-NEXT: imulq %rdi, %rcx
-; SCALAR-NEXT: xorq %rdx, %rcx
-; SCALAR-NEXT: imulq %rdi, %rax
-; SCALAR-NEXT: xorq %rcx, %rax
-; SCALAR-NEXT: xorq %r11, %rax
-; SCALAR-NEXT: bswapq %rax
-; SCALAR-NEXT: movq %rax, %rcx
-; SCALAR-NEXT: shrq $4, %rcx
-; SCALAR-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
-; SCALAR-NEXT: andq %rdx, %rcx
-; SCALAR-NEXT: andq %rdx, %rax
-; SCALAR-NEXT: shlq $4, %rax
-; SCALAR-NEXT: orq %rcx, %rax
-; SCALAR-NEXT: movq %rax, %rcx
-; SCALAR-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
-; SCALAR-NEXT: andq %rdx, %rcx
-; SCALAR-NEXT: shrq $2, %rax
-; SCALAR-NEXT: andq %rdx, %rax
-; SCALAR-NEXT: leaq (%rax,%rcx,4), %rax
-; SCALAR-NEXT: movabsq $6148914691236517205, %rdx # imm = 0x5555555555555555
-; SCALAR-NEXT: andq %rax, %rdx
-; SCALAR-NEXT: shrq %rax
-; SCALAR-NEXT: movabsq $6148914691236517204, %rcx # imm = 0x5555555555555554
-; SCALAR-NEXT: andq %rax, %rcx
-; SCALAR-NEXT: leaq (%rcx,%rdx,2), %rax
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $40, %rbx
+; SCALAR-NEXT: btq $40, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $41, %r10
+; SCALAR-NEXT: btq $41, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $42, %rbx
+; SCALAR-NEXT: btq $42, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $43, %r10
+; SCALAR-NEXT: btq $43, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $44, %rbx
+; SCALAR-NEXT: btq $44, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r14
+; SCALAR-NEXT: shlq $45, %r14
+; SCALAR-NEXT: btq $45, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r14
+; SCALAR-NEXT: xorq %rbx, %r14
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $46, %r10
+; SCALAR-NEXT: btq $46, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: xorq %r14, %r10
+; SCALAR-NEXT: xorq %r11, %r10
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $47, %r11
+; SCALAR-NEXT: btq $47, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $48, %rbx
+; SCALAR-NEXT: btq $48, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $49, %r11
+; SCALAR-NEXT: btq $49, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $50, %rbx
+; SCALAR-NEXT: btq $50, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $51, %r11
+; SCALAR-NEXT: btq $51, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $52, %rbx
+; SCALAR-NEXT: btq $52, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $53, %r11
+; SCALAR-NEXT: btq $53, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $54, %rbx
+; SCALAR-NEXT: btq $54, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $55, %r11
+; SCALAR-NEXT: btq $55, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $56, %rbx
+; SCALAR-NEXT: btq $56, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r11, %rbx
+; SCALAR-NEXT: movq %r8, %r11
+; SCALAR-NEXT: shlq $57, %r11
+; SCALAR-NEXT: btq $57, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r11
+; SCALAR-NEXT: xorq %rbx, %r11
+; SCALAR-NEXT: xorq %r10, %r11
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $58, %r10
+; SCALAR-NEXT: btq $58, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $59, %rbx
+; SCALAR-NEXT: btq $59, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: movq %r8, %r10
+; SCALAR-NEXT: shlq $60, %r10
+; SCALAR-NEXT: btq $60, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r10
+; SCALAR-NEXT: xorq %rbx, %r10
+; SCALAR-NEXT: movq %r8, %rbx
+; SCALAR-NEXT: shlq $61, %rbx
+; SCALAR-NEXT: btq $61, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rbx
+; SCALAR-NEXT: xorq %r10, %rbx
+; SCALAR-NEXT: shlq $62, %r8
+; SCALAR-NEXT: btq $62, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %r8
+; SCALAR-NEXT: xorq %rbx, %r8
+; SCALAR-NEXT: shlq $63, %rdi
+; SCALAR-NEXT: btq $63, %rsi
+; SCALAR-NEXT: cmovaeq %r9, %rdi
+; SCALAR-NEXT: xorq %r8, %rdi
+; SCALAR-NEXT: xorq %r11, %rdi
+; SCALAR-NEXT: bswapq %rdi
+; SCALAR-NEXT: movq %rdi, %rsi
+; SCALAR-NEXT: shrq $4, %rsi
+; SCALAR-NEXT: andq %rdx, %rsi
+; SCALAR-NEXT: andq %rdx, %rdi
+; SCALAR-NEXT: shlq $4, %rdi
+; SCALAR-NEXT: orq %rsi, %rdi
+; SCALAR-NEXT: movq %rdi, %rdx
+; SCALAR-NEXT: andq %rcx, %rdx
+; SCALAR-NEXT: shrq $2, %rdi
+; SCALAR-NEXT: andq %rcx, %rdi
+; SCALAR-NEXT: leaq (%rdi,%rdx,4), %rcx
+; SCALAR-NEXT: andq %rcx, %rax
+; SCALAR-NEXT: shrq %rcx
+; SCALAR-NEXT: movabsq $6148914691236517204, %rdx # imm = 0x5555555555555554
+; SCALAR-NEXT: andq %rcx, %rdx
+; SCALAR-NEXT: leaq (%rdx,%rax,2), %rax
; SCALAR-NEXT: shrq %rax
-; SCALAR-NEXT: addq $40, %rsp
; SCALAR-NEXT: popq %rbx
-; SCALAR-NEXT: popq %r12
-; SCALAR-NEXT: popq %r13
; SCALAR-NEXT: popq %r14
-; SCALAR-NEXT: popq %r15
-; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE2-PCLMUL-LABEL: clmulh_i64:
@@ -2141,51 +3117,50 @@ define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
define i8 @clmul_i8_noimplicitfloat(i8 %a, i8 %b) nounwind noimplicitfloat {
; CHECK-LABEL: clmul_i8_noimplicitfloat:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %esi, %edx
-; CHECK-NEXT: andb $1, %dl
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: andb $2, %cl
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: mulb %cl
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: mulb %dl
-; CHECK-NEXT: movl %eax, %edx
-; CHECK-NEXT: xorb %cl, %dl
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: andb $4, %cl
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testb $1, %sil
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: mulb %cl
-; CHECK-NEXT: movl %eax, %r8d
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: andb $8, %cl
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: mulb %cl
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: xorb %r8b, %cl
-; CHECK-NEXT: xorb %dl, %cl
-; CHECK-NEXT: movl %esi, %edx
-; CHECK-NEXT: andb $16, %dl
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: mulb %dl
-; CHECK-NEXT: movl %eax, %edx
-; CHECK-NEXT: movl %esi, %r8d
-; CHECK-NEXT: andb $32, %r8b
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: mulb %r8b
-; CHECK-NEXT: movl %eax, %r8d
-; CHECK-NEXT: movl %esi, %r9d
-; CHECK-NEXT: andb $64, %r9b
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: mulb %r9b
-; CHECK-NEXT: movl %eax, %r9d
-; CHECK-NEXT: xorb %dl, %r8b
-; CHECK-NEXT: xorb %r8b, %r9b
-; CHECK-NEXT: xorb %cl, %r9b
-; CHECK-NEXT: andb $-128, %sil
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: mulb %sil
-; CHECK-NEXT: xorb %r9b, %al
+; CHECK-NEXT: cmovel %ecx, %eax
+; CHECK-NEXT: leal (%rdi,%rdi), %edx
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: testb $2, %sil
+; CHECK-NEXT: cmovel %ecx, %edx
+; CHECK-NEXT: xorl %eax, %edx
+; CHECK-NEXT: leal (,%rdi,4), %eax
+; CHECK-NEXT: movzbl %al, %r8d
+; CHECK-NEXT: testb $4, %sil
+; CHECK-NEXT: cmovel %ecx, %r8d
+; CHECK-NEXT: leal (,%rdi,8), %eax
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: testb $8, %sil
+; CHECK-NEXT: cmovel %ecx, %eax
+; CHECK-NEXT: xorl %r8d, %eax
+; CHECK-NEXT: xorl %edx, %eax
+; CHECK-NEXT: movl %edi, %edx
+; CHECK-NEXT: shlb $4, %dl
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: testb $16, %sil
+; CHECK-NEXT: cmovel %ecx, %edx
+; CHECK-NEXT: movl %edi, %r8d
+; CHECK-NEXT: shlb $5, %r8b
+; CHECK-NEXT: movzbl %r8b, %r8d
+; CHECK-NEXT: testb $32, %sil
+; CHECK-NEXT: cmovel %ecx, %r8d
+; CHECK-NEXT: xorl %edx, %r8d
+; CHECK-NEXT: movl %edi, %edx
+; CHECK-NEXT: shlb $6, %dl
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: testb $64, %sil
+; CHECK-NEXT: cmovel %ecx, %edx
+; CHECK-NEXT: xorl %r8d, %edx
+; CHECK-NEXT: xorl %eax, %edx
+; CHECK-NEXT: shlb $7, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: testb $-128, %sil
+; CHECK-NEXT: cmovel %ecx, %eax
+; CHECK-NEXT: xorl %edx, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res = call i8 @llvm.clmul.i8(i8 %a, i8 %b)
ret i8 %res
@@ -2196,57 +3171,51 @@ declare void @use(i8)
define void @commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; SCALAR-LABEL: commutative_clmul_i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: pushq %rbp
-; SCALAR-NEXT: pushq %rbx
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andb $1, %r8b
-; SCALAR-NEXT: movl %esi, %r9d
-; SCALAR-NEXT: andb $2, %r9b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r9b
-; SCALAR-NEXT: movl %eax, %r9d
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r8b
-; SCALAR-NEXT: movl %eax, %r8d
-; SCALAR-NEXT: xorb %r9b, %r8b
-; SCALAR-NEXT: movl %esi, %r9d
-; SCALAR-NEXT: andb $4, %r9b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r9b
-; SCALAR-NEXT: movl %eax, %r9d
-; SCALAR-NEXT: movl %esi, %r10d
-; SCALAR-NEXT: andb $8, %r10b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r10b
-; SCALAR-NEXT: movl %eax, %r10d
-; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: andb $16, %r11b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r11b
-; SCALAR-NEXT: movl %eax, %r11d
-; SCALAR-NEXT: movl %esi, %ebx
-; SCALAR-NEXT: andb $32, %bl
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %bl
-; SCALAR-NEXT: movl %eax, %ebx
-; SCALAR-NEXT: movl %esi, %ebp
-; SCALAR-NEXT: andb $64, %bpl
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %bpl
-; SCALAR-NEXT: movl %eax, %ebp
-; SCALAR-NEXT: xorb %r9b, %r10b
-; SCALAR-NEXT: xorb %r8b, %r10b
-; SCALAR-NEXT: xorb %r11b, %bl
-; SCALAR-NEXT: xorb %bl, %bpl
-; SCALAR-NEXT: xorb %r10b, %bpl
-; SCALAR-NEXT: andb $-128, %sil
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %sil
-; SCALAR-NEXT: xorb %bpl, %al
-; SCALAR-NEXT: movb %al, (%rdx)
-; SCALAR-NEXT: movb %al, (%rcx)
-; SCALAR-NEXT: popq %rbx
-; SCALAR-NEXT: popq %rbp
+; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
+; SCALAR-NEXT: xorl %eax, %eax
+; SCALAR-NEXT: testb $1, %sil
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: cmovel %eax, %r8d
+; SCALAR-NEXT: leal (%rdi,%rdi), %r9d
+; SCALAR-NEXT: movzbl %r9b, %r9d
+; SCALAR-NEXT: testb $2, %sil
+; SCALAR-NEXT: cmovel %eax, %r9d
+; SCALAR-NEXT: xorl %r8d, %r9d
+; SCALAR-NEXT: leal (,%rdi,4), %r8d
+; SCALAR-NEXT: movzbl %r8b, %r10d
+; SCALAR-NEXT: testb $4, %sil
+; SCALAR-NEXT: cmovel %eax, %r10d
+; SCALAR-NEXT: leal (,%rdi,8), %r8d
+; SCALAR-NEXT: movzbl %r8b, %r8d
+; SCALAR-NEXT: testb $8, %sil
+; SCALAR-NEXT: cmovel %eax, %r8d
+; SCALAR-NEXT: xorl %r10d, %r8d
+; SCALAR-NEXT: xorl %r9d, %r8d
+; SCALAR-NEXT: movl %edi, %r9d
+; SCALAR-NEXT: shlb $4, %r9b
+; SCALAR-NEXT: movzbl %r9b, %r9d
+; SCALAR-NEXT: testb $16, %sil
+; SCALAR-NEXT: cmovel %eax, %r9d
+; SCALAR-NEXT: movl %edi, %r10d
+; SCALAR-NEXT: shlb $5, %r10b
+; SCALAR-NEXT: movzbl %r10b, %r10d
+; SCALAR-NEXT: testb $32, %sil
+; SCALAR-NEXT: cmovel %eax, %r10d
+; SCALAR-NEXT: xorl %r9d, %r10d
+; SCALAR-NEXT: movl %edi, %r9d
+; SCALAR-NEXT: shlb $6, %r9b
+; SCALAR-NEXT: movzbl %r9b, %r9d
+; SCALAR-NEXT: testb $64, %sil
+; SCALAR-NEXT: cmovel %eax, %r9d
+; SCALAR-NEXT: xorl %r10d, %r9d
+; SCALAR-NEXT: xorl %r8d, %r9d
+; SCALAR-NEXT: shlb $7, %dil
+; SCALAR-NEXT: movzbl %dil, %edi
+; SCALAR-NEXT: testb $-128, %sil
+; SCALAR-NEXT: cmovel %eax, %edi
+; SCALAR-NEXT: xorl %r9d, %edi
+; SCALAR-NEXT: movb %dil, (%rdx)
+; SCALAR-NEXT: movb %dil, (%rcx)
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: commutative_clmul_i8:
@@ -2278,40 +3247,98 @@ define void @commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
define void @commutative_clmulh_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; SCALAR-LABEL: commutative_clmulh_i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %edi, %r8d
-; SCALAR-NEXT: andl $2, %r8d
-; SCALAR-NEXT: movzbl %sil, %eax
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: movl %edi, %esi
-; SCALAR-NEXT: andl $1, %esi
-; SCALAR-NEXT: imull %eax, %esi
-; SCALAR-NEXT: xorl %r8d, %esi
-; SCALAR-NEXT: movl %edi, %r8d
-; SCALAR-NEXT: andl $4, %r8d
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: movl %edi, %r9d
-; SCALAR-NEXT: andl $8, %r9d
-; SCALAR-NEXT: imull %eax, %r9d
-; SCALAR-NEXT: xorl %r8d, %r9d
-; SCALAR-NEXT: xorl %esi, %r9d
-; SCALAR-NEXT: movl %edi, %esi
-; SCALAR-NEXT: andl $16, %esi
-; SCALAR-NEXT: imull %eax, %esi
-; SCALAR-NEXT: movl %edi, %r8d
-; SCALAR-NEXT: andl $32, %r8d
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: xorl %esi, %r8d
-; SCALAR-NEXT: movl %edi, %esi
-; SCALAR-NEXT: andl $64, %esi
-; SCALAR-NEXT: imull %eax, %esi
-; SCALAR-NEXT: xorl %r8d, %esi
-; SCALAR-NEXT: xorl %r9d, %esi
+; SCALAR-NEXT: pushq %rbp
+; SCALAR-NEXT: pushq %r15
+; SCALAR-NEXT: pushq %r14
+; SCALAR-NEXT: pushq %r13
+; SCALAR-NEXT: pushq %r12
+; SCALAR-NEXT: pushq %rbx
+; SCALAR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SCALAR-NEXT: movzbl %sil, %r14d
+; SCALAR-NEXT: movl %r14d, %ebp
+; SCALAR-NEXT: shll $8, %ebp
+; SCALAR-NEXT: movl %r14d, %ebx
+; SCALAR-NEXT: shll $9, %ebx
+; SCALAR-NEXT: movl %r14d, %r11d
+; SCALAR-NEXT: shll $10, %r11d
+; SCALAR-NEXT: movl %r14d, %eax
+; SCALAR-NEXT: shll $11, %eax
+; SCALAR-NEXT: movl %r14d, %r10d
+; SCALAR-NEXT: shll $12, %r10d
+; SCALAR-NEXT: movl %r14d, %ecx
+; SCALAR-NEXT: shll $13, %ecx
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: shll $14, %r8d
+; SCALAR-NEXT: xorl %r15d, %r15d
+; SCALAR-NEXT: testw %r15w, %r15w
+; SCALAR-NEXT: cmovel %r15d, %r8d
+; SCALAR-NEXT: cmovel %r15d, %ecx
+; SCALAR-NEXT: cmovel %r15d, %r10d
+; SCALAR-NEXT: cmovel %r15d, %eax
+; SCALAR-NEXT: cmovel %r15d, %r11d
+; SCALAR-NEXT: cmovel %r15d, %ebx
+; SCALAR-NEXT: cmovel %r15d, %ebp
+; SCALAR-NEXT: shll $15, %esi
+; SCALAR-NEXT: testw %r15w, %r15w
+; SCALAR-NEXT: cmovel %r15d, %esi
+; SCALAR-NEXT: movl %edi, %r15d
+; SCALAR-NEXT: andl $1, %r15d
+; SCALAR-NEXT: cmovnel %r14d, %r15d
+; SCALAR-NEXT: leal (%r14,%r14), %r12d
+; SCALAR-NEXT: movl %edi, %r13d
+; SCALAR-NEXT: andl $2, %r13d
+; SCALAR-NEXT: cmovnel %r12d, %r13d
+; SCALAR-NEXT: xorl %r15d, %r13d
+; SCALAR-NEXT: leal (,%r14,4), %r15d
+; SCALAR-NEXT: movl %edi, %r12d
+; SCALAR-NEXT: andl $4, %r12d
+; SCALAR-NEXT: cmovnel %r15d, %r12d
+; SCALAR-NEXT: movl %edi, %r15d
+; SCALAR-NEXT: andl $8, %r15d
+; SCALAR-NEXT: leal (,%r14,8), %r9d
+; SCALAR-NEXT: cmovnel %r9d, %r15d
+; SCALAR-NEXT: xorl %r12d, %r15d
+; SCALAR-NEXT: xorl %r13d, %r15d
+; SCALAR-NEXT: movl %r14d, %r9d
+; SCALAR-NEXT: shll $4, %r9d
+; SCALAR-NEXT: movl %edi, %r12d
+; SCALAR-NEXT: andl $16, %r12d
+; SCALAR-NEXT: cmovnel %r9d, %r12d
+; SCALAR-NEXT: movl %r14d, %r9d
+; SCALAR-NEXT: shll $5, %r9d
+; SCALAR-NEXT: movl %edi, %r13d
+; SCALAR-NEXT: andl $32, %r13d
+; SCALAR-NEXT: cmovnel %r9d, %r13d
+; SCALAR-NEXT: xorl %r12d, %r13d
+; SCALAR-NEXT: movl %r14d, %r9d
+; SCALAR-NEXT: shll $6, %r9d
+; SCALAR-NEXT: movl %edi, %r12d
+; SCALAR-NEXT: andl $64, %r12d
+; SCALAR-NEXT: cmovnel %r9d, %r12d
+; SCALAR-NEXT: xorl %r13d, %r12d
+; SCALAR-NEXT: xorl %r15d, %r12d
+; SCALAR-NEXT: shll $7, %r14d
; SCALAR-NEXT: andl $128, %edi
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %esi, %edi
-; SCALAR-NEXT: shrl $8, %edi
-; SCALAR-NEXT: movb %dil, (%rdx)
-; SCALAR-NEXT: movb %dil, (%rcx)
+; SCALAR-NEXT: cmovnel %r14d, %edi
+; SCALAR-NEXT: xorl %ebp, %edi
+; SCALAR-NEXT: xorl %ebx, %edi
+; SCALAR-NEXT: xorl %r11d, %edi
+; SCALAR-NEXT: xorl %r12d, %edi
+; SCALAR-NEXT: xorl %r10d, %eax
+; SCALAR-NEXT: xorl %ecx, %eax
+; SCALAR-NEXT: xorl %r8d, %eax
+; SCALAR-NEXT: xorl %esi, %eax
+; SCALAR-NEXT: xorl %edi, %eax
+; SCALAR-NEXT: shrl $8, %eax
+; SCALAR-NEXT: movb %al, (%rdx)
+; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; SCALAR-NEXT: movb %al, (%rcx)
+; SCALAR-NEXT: popq %rbx
+; SCALAR-NEXT: popq %r12
+; SCALAR-NEXT: popq %r13
+; SCALAR-NEXT: popq %r14
+; SCALAR-NEXT: popq %r15
+; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: commutative_clmulh_i8:
@@ -2355,40 +3382,92 @@ define void @commutative_clmulh_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
define void @commutative_clmulr_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; SCALAR-LABEL: commutative_clmulr_i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %edi, %r8d
-; SCALAR-NEXT: andl $2, %r8d
-; SCALAR-NEXT: movzbl %sil, %eax
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: movl %edi, %esi
-; SCALAR-NEXT: andl $1, %esi
-; SCALAR-NEXT: imull %eax, %esi
-; SCALAR-NEXT: xorl %r8d, %esi
-; SCALAR-NEXT: movl %edi, %r8d
-; SCALAR-NEXT: andl $4, %r8d
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: movl %edi, %r9d
-; SCALAR-NEXT: andl $8, %r9d
-; SCALAR-NEXT: imull %eax, %r9d
-; SCALAR-NEXT: xorl %r8d, %r9d
-; SCALAR-NEXT: xorl %esi, %r9d
-; SCALAR-NEXT: movl %edi, %esi
-; SCALAR-NEXT: andl $16, %esi
-; SCALAR-NEXT: imull %eax, %esi
-; SCALAR-NEXT: movl %edi, %r8d
-; SCALAR-NEXT: andl $32, %r8d
-; SCALAR-NEXT: imull %eax, %r8d
-; SCALAR-NEXT: xorl %esi, %r8d
-; SCALAR-NEXT: movl %edi, %esi
-; SCALAR-NEXT: andl $64, %esi
-; SCALAR-NEXT: imull %eax, %esi
-; SCALAR-NEXT: xorl %r8d, %esi
-; SCALAR-NEXT: xorl %r9d, %esi
+; SCALAR-NEXT: pushq %rbp
+; SCALAR-NEXT: pushq %r15
+; SCALAR-NEXT: pushq %r14
+; SCALAR-NEXT: pushq %r13
+; SCALAR-NEXT: pushq %r12
+; SCALAR-NEXT: pushq %rbx
+; SCALAR-NEXT: movzbl %sil, %r14d
+; SCALAR-NEXT: movl %r14d, %ebx
+; SCALAR-NEXT: shll $8, %ebx
+; SCALAR-NEXT: movl %r14d, %r11d
+; SCALAR-NEXT: shll $9, %r11d
+; SCALAR-NEXT: movl %r14d, %r10d
+; SCALAR-NEXT: shll $10, %r10d
+; SCALAR-NEXT: movl %r14d, %eax
+; SCALAR-NEXT: shll $11, %eax
+; SCALAR-NEXT: movl %r14d, %r9d
+; SCALAR-NEXT: shll $12, %r9d
+; SCALAR-NEXT: movl %esi, %r8d
+; SCALAR-NEXT: shll $13, %r8d
+; SCALAR-NEXT: xorl %ebp, %ebp
+; SCALAR-NEXT: testw %bp, %bp
+; SCALAR-NEXT: cmovel %ebp, %r8d
+; SCALAR-NEXT: cmovel %ebp, %r9d
+; SCALAR-NEXT: cmovel %ebp, %eax
+; SCALAR-NEXT: cmovel %ebp, %r10d
+; SCALAR-NEXT: cmovel %ebp, %r11d
+; SCALAR-NEXT: cmovel %ebp, %ebx
+; SCALAR-NEXT: shll $14, %esi
+; SCALAR-NEXT: testw %bp, %bp
+; SCALAR-NEXT: cmovel %ebp, %esi
+; SCALAR-NEXT: movl %edi, %ebp
+; SCALAR-NEXT: andl $1, %ebp
+; SCALAR-NEXT: cmovnel %r14d, %ebp
+; SCALAR-NEXT: leal (%r14,%r14), %r15d
+; SCALAR-NEXT: movl %edi, %r12d
+; SCALAR-NEXT: andl $2, %r12d
+; SCALAR-NEXT: cmovnel %r15d, %r12d
+; SCALAR-NEXT: xorl %ebp, %r12d
+; SCALAR-NEXT: leal (,%r14,4), %ebp
+; SCALAR-NEXT: movl %edi, %r15d
+; SCALAR-NEXT: andl $4, %r15d
+; SCALAR-NEXT: cmovnel %ebp, %r15d
+; SCALAR-NEXT: leal (,%r14,8), %r13d
+; SCALAR-NEXT: movl %edi, %ebp
+; SCALAR-NEXT: andl $8, %ebp
+; SCALAR-NEXT: cmovnel %r13d, %ebp
+; SCALAR-NEXT: xorl %r15d, %ebp
+; SCALAR-NEXT: xorl %r12d, %ebp
+; SCALAR-NEXT: movl %r14d, %r15d
+; SCALAR-NEXT: shll $4, %r15d
+; SCALAR-NEXT: movl %edi, %r12d
+; SCALAR-NEXT: andl $16, %r12d
+; SCALAR-NEXT: cmovnel %r15d, %r12d
+; SCALAR-NEXT: movl %r14d, %r15d
+; SCALAR-NEXT: shll $5, %r15d
+; SCALAR-NEXT: movl %edi, %r13d
+; SCALAR-NEXT: andl $32, %r13d
+; SCALAR-NEXT: cmovnel %r15d, %r13d
+; SCALAR-NEXT: xorl %r12d, %r13d
+; SCALAR-NEXT: movl %r14d, %r15d
+; SCALAR-NEXT: shll $6, %r15d
+; SCALAR-NEXT: movl %edi, %r12d
+; SCALAR-NEXT: andl $64, %r12d
+; SCALAR-NEXT: cmovnel %r15d, %r12d
+; SCALAR-NEXT: xorl %r13d, %r12d
+; SCALAR-NEXT: xorl %ebp, %r12d
+; SCALAR-NEXT: shll $7, %r14d
; SCALAR-NEXT: andl $128, %edi
-; SCALAR-NEXT: imull %eax, %edi
-; SCALAR-NEXT: xorl %esi, %edi
-; SCALAR-NEXT: shrl $7, %edi
-; SCALAR-NEXT: movb %dil, (%rdx)
-; SCALAR-NEXT: movb %dil, (%rcx)
+; SCALAR-NEXT: cmovnel %r14d, %edi
+; SCALAR-NEXT: xorl %ebx, %edi
+; SCALAR-NEXT: xorl %r11d, %edi
+; SCALAR-NEXT: xorl %r10d, %edi
+; SCALAR-NEXT: xorl %r12d, %edi
+; SCALAR-NEXT: xorl %r9d, %eax
+; SCALAR-NEXT: xorl %r8d, %eax
+; SCALAR-NEXT: xorl %esi, %eax
+; SCALAR-NEXT: xorl %edi, %eax
+; SCALAR-NEXT: shrl $7, %eax
+; SCALAR-NEXT: movb %al, (%rdx)
+; SCALAR-NEXT: movb %al, (%rcx)
+; SCALAR-NEXT: popq %rbx
+; SCALAR-NEXT: popq %r12
+; SCALAR-NEXT: popq %r13
+; SCALAR-NEXT: popq %r14
+; SCALAR-NEXT: popq %r15
+; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: commutative_clmulr_i8:
@@ -2436,53 +3515,50 @@ define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwi
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: pushq %rax
; SCALAR-NEXT: movq %rcx, %rbx
-; SCALAR-NEXT: movl %esi, %ecx
-; SCALAR-NEXT: andb $1, %cl
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andb $2, %r8b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r8b
-; SCALAR-NEXT: movl %eax, %r8d
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %cl
-; SCALAR-NEXT: movl %eax, %ecx
-; SCALAR-NEXT: xorb %r8b, %cl
-; SCALAR-NEXT: movl %esi, %r8d
-; SCALAR-NEXT: andb $4, %r8b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r8b
-; SCALAR-NEXT: movl %eax, %r8d
-; SCALAR-NEXT: movl %esi, %r9d
-; SCALAR-NEXT: andb $8, %r9b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r9b
-; SCALAR-NEXT: movl %eax, %r9d
-; SCALAR-NEXT: movl %esi, %r10d
-; SCALAR-NEXT: andb $16, %r10b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r10b
-; SCALAR-NEXT: movl %eax, %r10d
-; SCALAR-NEXT: movl %esi, %r11d
-; SCALAR-NEXT: andb $32, %r11b
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %r11b
-; SCALAR-NEXT: movl %eax, %r11d
-; SCALAR-NEXT: movl %esi, %ebp
-; SCALAR-NEXT: andb $64, %bpl
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %bpl
-; SCALAR-NEXT: movl %eax, %ebp
-; SCALAR-NEXT: xorb %r8b, %r9b
-; SCALAR-NEXT: xorb %cl, %r9b
-; SCALAR-NEXT: xorb %r10b, %r11b
-; SCALAR-NEXT: xorb %r11b, %bpl
-; SCALAR-NEXT: xorb %r9b, %bpl
-; SCALAR-NEXT: andb $-128, %sil
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: mulb %sil
-; SCALAR-NEXT: xorb %bpl, %al
-; SCALAR-NEXT: movb %al, (%rdx)
-; SCALAR-NEXT: movzbl %al, %ebp
+; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
+; SCALAR-NEXT: xorl %eax, %eax
+; SCALAR-NEXT: testb $1, %sil
+; SCALAR-NEXT: movl %edi, %ebp
+; SCALAR-NEXT: cmovel %eax, %ebp
+; SCALAR-NEXT: leal (%rdi,%rdi), %ecx
+; SCALAR-NEXT: movzbl %cl, %ecx
+; SCALAR-NEXT: testb $2, %sil
+; SCALAR-NEXT: cmovel %eax, %ecx
+; SCALAR-NEXT: xorl %ecx, %ebp
+; SCALAR-NEXT: leal (,%rdi,4), %ecx
+; SCALAR-NEXT: movzbl %cl, %ecx
+; SCALAR-NEXT: testb $4, %sil
+; SCALAR-NEXT: cmovel %eax, %ecx
+; SCALAR-NEXT: leal (,%rdi,8), %r8d
+; SCALAR-NEXT: movzbl %r8b, %r8d
+; SCALAR-NEXT: testb $8, %sil
+; SCALAR-NEXT: cmovel %eax, %r8d
+; SCALAR-NEXT: xorl %ecx, %r8d
+; SCALAR-NEXT: xorl %r8d, %ebp
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shlb $4, %cl
+; SCALAR-NEXT: movzbl %cl, %ecx
+; SCALAR-NEXT: testb $16, %sil
+; SCALAR-NEXT: cmovel %eax, %ecx
+; SCALAR-NEXT: movl %edi, %r8d
+; SCALAR-NEXT: shlb $5, %r8b
+; SCALAR-NEXT: movzbl %r8b, %r8d
+; SCALAR-NEXT: testb $32, %sil
+; SCALAR-NEXT: cmovel %eax, %r8d
+; SCALAR-NEXT: xorl %ecx, %r8d
+; SCALAR-NEXT: movl %edi, %ecx
+; SCALAR-NEXT: shlb $6, %cl
+; SCALAR-NEXT: movzbl %cl, %ecx
+; SCALAR-NEXT: testb $64, %sil
+; SCALAR-NEXT: cmovel %eax, %ecx
+; SCALAR-NEXT: xorl %r8d, %ecx
+; SCALAR-NEXT: xorl %ecx, %ebp
+; SCALAR-NEXT: shlb $7, %dil
+; SCALAR-NEXT: movzbl %dil, %ecx
+; SCALAR-NEXT: testb $-128, %sil
+; SCALAR-NEXT: cmovel %eax, %ecx
+; SCALAR-NEXT: xorl %ecx, %ebp
+; SCALAR-NEXT: movb %bpl, (%rdx)
; SCALAR-NEXT: movl %ebp, %edi
; SCALAR-NEXT: callq use at PLT
; SCALAR-NEXT: movb %bpl, (%rbx)
More information about the llvm-commits
mailing list