[llvm] db3f324 - [RISCV] Use vfirst.m to extract the first element from mask vector.
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 2 19:24:27 PST 2023
Author: jacquesguan
Date: 2023-01-03T11:24:18+08:00
New Revision: db3f3243bb85253eaf0e483452fdbfd5baf7ffd1
URL: https://github.com/llvm/llvm-project/commit/db3f3243bb85253eaf0e483452fdbfd5baf7ffd1
DIFF: https://github.com/llvm/llvm-project/commit/db3f3243bb85253eaf0e483452fdbfd5baf7ffd1.diff
LOG: [RISCV] Use vfirst.m to extract the first element from mask vector.
This patch uses vfirst.m to extract the first bit of mask.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D139512
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f89fc2bfe6953..55edf98b3de4e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5266,6 +5266,19 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
if (VecVT.getVectorElementType() == MVT::i1) {
+ // Use vfirst.m to extract the first bit.
+ if (isNullConstant(Idx)) {
+ MVT ContainerVT = VecVT;
+ if (VecVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VecVT);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ }
+ auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Vfirst =
+ DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
+ return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT),
+ ISD::SETEQ);
+ }
if (VecVT.isFixedLengthVector()) {
unsigned NumElts = VecVT.getVectorNumElements();
if (NumElts >= 8) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 7d4baaa141b5a..a20b0891783be 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -301,6 +301,9 @@ enum NodeType : unsigned {
// vcpop.m with additional mask and VL operands.
VCPOP_VL,
+ // vfirst.m with additional mask and VL operands.
+ VFIRST_VL,
+
// Reads value of CSR.
// The first operand is a chain pointer. The second specifies address of the
// required CSR. Two results are produced, the read value and the new chain
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index c6804b6011ebf..e54003e1576c5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -228,6 +228,13 @@ def riscv_vcpop_vl : SDNode<"RISCVISD::VCPOP_VL",
SDTCisSameNumEltsAs<1, 2>,
SDTCisVT<3, XLenVT>]>>;
+def riscv_vfirst_vl : SDNode<"RISCVISD::VFIRST_VL",
+ SDTypeProfile<1, 3, [SDTCisVT<0, XLenVT>,
+ SDTCisVec<1>, SDTCisInt<1>,
+ SDTCVecEltisVT<2, i1>,
+ SDTCisSameNumEltsAs<1, 2>,
+ SDTCisVT<3, XLenVT>]>>;
+
def SDT_RISCVVEXTEND_VL : SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameNumEltsAs<0, 1>,
SDTCisSameNumEltsAs<1, 2>,
@@ -1829,6 +1836,16 @@ foreach mti = AllMasks in {
VLOpFrag)),
(!cast<Instruction>("PseudoVCPOP_M_" # mti.BX # "_MASK")
VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
+
+ // 16.3 vfirst find-first-set mask bit
+ def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFIRST_M_" # mti.BX)
+ VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFIRST_M_" # mti.BX # "_MASK")
+ VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
}
} // Predicates = [HasVInstructions]
diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
index df1314070ce72..8089d0adedc94 100644
--- a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
@@ -27,15 +27,10 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan
; RV32-NEXT: vmsne.vi v0, v11, 0
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
-; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV32-NEXT: vmv1r.v v0, v10
-; RV32-NEXT: vmerge.vim v8, v9, 1, v0
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: andi a1, a1, 1
+; RV32-NEXT: vfirst.m a1, v10
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv.v.x v8, a1
; RV32-NEXT: vmsne.vi v0, v8, 0
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -54,15 +49,10 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan
; RV64-NEXT: vmsne.vi v0, v13, 0
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
-; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
-; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT: vmv1r.v v0, v12
-; RV64-NEXT: vmerge.vim v8, v10, 1, v0
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: andi a1, a1, 1
+; RV64-NEXT: vfirst.m a1, v12
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64-NEXT: vmv.v.x v8, a1
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index dc3bc4b27abeb..81fdef107d4db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -219,10 +219,9 @@ define i1 @extractelt_nxv1i1_idx0(<vscale x 1 x i8>* %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v8, v8, 0
+; CHECK-NEXT: vfirst.m a0, v8
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <vscale x 1 x i8>, <vscale x 1 x i8>* %x
%b = icmp eq <vscale x 1 x i8> %a, zeroinitializer
@@ -235,10 +234,9 @@ define i1 @extractelt_nxv2i1_idx0(<vscale x 2 x i8>* %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v8, v8, 0
+; CHECK-NEXT: vfirst.m a0, v8
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <vscale x 2 x i8>, <vscale x 2 x i8>* %x
%b = icmp eq <vscale x 2 x i8> %a, zeroinitializer
@@ -251,10 +249,9 @@ define i1 @extractelt_nxv4i1_idx0(<vscale x 4 x i8>* %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v8, v8, 0
+; CHECK-NEXT: vfirst.m a0, v8
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <vscale x 4 x i8>, <vscale x 4 x i8>* %x
%b = icmp eq <vscale x 4 x i8> %a, zeroinitializer
@@ -267,10 +264,9 @@ define i1 @extractelt_nxv8i1_idx0(<vscale x 8 x i8>* %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vl1r.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v8, v8, 0
+; CHECK-NEXT: vfirst.m a0, v8
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <vscale x 8 x i8>, <vscale x 8 x i8>* %x
%b = icmp eq <vscale x 8 x i8> %a, zeroinitializer
@@ -283,10 +279,9 @@ define i1 @extractelt_nxv16i1_idx0(<vscale x 16 x i8>* %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vl2r.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v10, v8, 0
+; CHECK-NEXT: vfirst.m a0, v10
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <vscale x 16 x i8>, <vscale x 16 x i8>* %x
%b = icmp eq <vscale x 16 x i8> %a, zeroinitializer
@@ -299,10 +294,9 @@ define i1 @extractelt_nxv32i1_idx0(<vscale x 32 x i8>* %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vl4r.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v12, v8, 0
+; CHECK-NEXT: vfirst.m a0, v12
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <vscale x 32 x i8>, <vscale x 32 x i8>* %x
%b = icmp eq <vscale x 32 x i8> %a, zeroinitializer
@@ -315,10 +309,9 @@ define i1 @extractelt_nxv64i1_idx0(<vscale x 64 x i8>* %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vl8r.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v16, v8, 0
+; CHECK-NEXT: vfirst.m a0, v16
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <vscale x 64 x i8>, <vscale x 64 x i8>* %x
%b = icmp eq <vscale x 64 x i8> %a, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
index 3bd69a1388a8f..2d609adaf0e4e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
@@ -17,10 +17,11 @@ define <2 x i1> @reverse_v2i1(<2 x i1> %a) {
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: vmv.x.s a0, v9
-; CHECK-NEXT: vmv.x.s a1, v8
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a1, v0
+; CHECK-NEXT: seqz a1, a1
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
@@ -40,9 +41,7 @@ define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: addi a0, sp, 15
; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: addi a0, sp, 14
; CHECK-NEXT: vse8.v v9, (a0)
@@ -53,6 +52,9 @@ define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
; CHECK-NEXT: addi a0, sp, 12
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vfirst.m a1, v0
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: sb a1, 15(sp)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -67,10 +69,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV32-BITS-UNKNOWN: # %bb.0:
; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16
; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16
-; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT: sb a0, 15(sp)
; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 15(sp)
; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
@@ -93,7 +96,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
; RV32-BITS-UNKNOWN-NEXT: sb a0, 8(sp)
; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 8
-; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a0)
; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
@@ -104,10 +106,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV32-BITS-256: # %bb.0:
; RV32-BITS-256-NEXT: addi sp, sp, -16
; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16
-; RV32-BITS-256-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-256-NEXT: vfirst.m a0, v0
+; RV32-BITS-256-NEXT: seqz a0, a0
+; RV32-BITS-256-NEXT: sb a0, 15(sp)
; RV32-BITS-256-NEXT: vmv.x.s a0, v0
-; RV32-BITS-256-NEXT: andi a1, a0, 1
-; RV32-BITS-256-NEXT: sb a1, 15(sp)
; RV32-BITS-256-NEXT: slli a1, a0, 30
; RV32-BITS-256-NEXT: srli a1, a1, 31
; RV32-BITS-256-NEXT: sb a1, 14(sp)
@@ -130,7 +133,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV32-BITS-256-NEXT: srli a0, a0, 31
; RV32-BITS-256-NEXT: sb a0, 8(sp)
; RV32-BITS-256-NEXT: addi a0, sp, 8
-; RV32-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-BITS-256-NEXT: vle8.v v8, (a0)
; RV32-BITS-256-NEXT: vand.vi v8, v8, 1
; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
@@ -141,10 +143,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV32-BITS-512: # %bb.0:
; RV32-BITS-512-NEXT: addi sp, sp, -16
; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16
-; RV32-BITS-512-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-512-NEXT: vfirst.m a0, v0
+; RV32-BITS-512-NEXT: seqz a0, a0
+; RV32-BITS-512-NEXT: sb a0, 15(sp)
; RV32-BITS-512-NEXT: vmv.x.s a0, v0
-; RV32-BITS-512-NEXT: andi a1, a0, 1
-; RV32-BITS-512-NEXT: sb a1, 15(sp)
; RV32-BITS-512-NEXT: slli a1, a0, 30
; RV32-BITS-512-NEXT: srli a1, a1, 31
; RV32-BITS-512-NEXT: sb a1, 14(sp)
@@ -167,7 +170,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV32-BITS-512-NEXT: srli a0, a0, 31
; RV32-BITS-512-NEXT: sb a0, 8(sp)
; RV32-BITS-512-NEXT: addi a0, sp, 8
-; RV32-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-BITS-512-NEXT: vle8.v v8, (a0)
; RV32-BITS-512-NEXT: vand.vi v8, v8, 1
; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
@@ -178,10 +180,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16
; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16
-; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT: sb a0, 15(sp)
; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT: andi a1, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 15(sp)
; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
@@ -204,7 +207,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63
; RV64-BITS-UNKNOWN-NEXT: sb a0, 8(sp)
; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 8
-; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a0)
; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
@@ -215,10 +217,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: addi sp, sp, -16
; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16
-; RV64-BITS-256-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-256-NEXT: vfirst.m a0, v0
+; RV64-BITS-256-NEXT: seqz a0, a0
+; RV64-BITS-256-NEXT: sb a0, 15(sp)
; RV64-BITS-256-NEXT: vmv.x.s a0, v0
-; RV64-BITS-256-NEXT: andi a1, a0, 1
-; RV64-BITS-256-NEXT: sb a1, 15(sp)
; RV64-BITS-256-NEXT: slli a1, a0, 62
; RV64-BITS-256-NEXT: srli a1, a1, 63
; RV64-BITS-256-NEXT: sb a1, 14(sp)
@@ -241,7 +244,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV64-BITS-256-NEXT: srli a0, a0, 63
; RV64-BITS-256-NEXT: sb a0, 8(sp)
; RV64-BITS-256-NEXT: addi a0, sp, 8
-; RV64-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-BITS-256-NEXT: vle8.v v8, (a0)
; RV64-BITS-256-NEXT: vand.vi v8, v8, 1
; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
@@ -252,10 +254,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: addi sp, sp, -16
; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16
-; RV64-BITS-512-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-512-NEXT: vfirst.m a0, v0
+; RV64-BITS-512-NEXT: seqz a0, a0
+; RV64-BITS-512-NEXT: sb a0, 15(sp)
; RV64-BITS-512-NEXT: vmv.x.s a0, v0
-; RV64-BITS-512-NEXT: andi a1, a0, 1
-; RV64-BITS-512-NEXT: sb a1, 15(sp)
; RV64-BITS-512-NEXT: slli a1, a0, 62
; RV64-BITS-512-NEXT: srli a1, a1, 63
; RV64-BITS-512-NEXT: sb a1, 14(sp)
@@ -278,7 +281,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV64-BITS-512-NEXT: srli a0, a0, 63
; RV64-BITS-512-NEXT: sb a0, 8(sp)
; RV64-BITS-512-NEXT: addi a0, sp, 8
-; RV64-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-BITS-512-NEXT: vle8.v v8, (a0)
; RV64-BITS-512-NEXT: vand.vi v8, v8, 1
; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
@@ -293,10 +295,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
; RV32-BITS-UNKNOWN: # %bb.0:
; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16
; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16
+; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT: sb a0, 15(sp)
; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 15(sp)
; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
@@ -354,10 +358,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
; RV32-BITS-256: # %bb.0:
; RV32-BITS-256-NEXT: addi sp, sp, -16
; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16
+; RV32-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-256-NEXT: vfirst.m a0, v0
+; RV32-BITS-256-NEXT: seqz a0, a0
+; RV32-BITS-256-NEXT: sb a0, 15(sp)
; RV32-BITS-256-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV32-BITS-256-NEXT: vmv.x.s a0, v0
-; RV32-BITS-256-NEXT: andi a1, a0, 1
-; RV32-BITS-256-NEXT: sb a1, 15(sp)
; RV32-BITS-256-NEXT: slli a1, a0, 30
; RV32-BITS-256-NEXT: srli a1, a1, 31
; RV32-BITS-256-NEXT: sb a1, 14(sp)
@@ -415,10 +421,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
; RV32-BITS-512: # %bb.0:
; RV32-BITS-512-NEXT: addi sp, sp, -16
; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16
+; RV32-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-512-NEXT: vfirst.m a0, v0
+; RV32-BITS-512-NEXT: seqz a0, a0
+; RV32-BITS-512-NEXT: sb a0, 15(sp)
; RV32-BITS-512-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV32-BITS-512-NEXT: vmv.x.s a0, v0
-; RV32-BITS-512-NEXT: andi a1, a0, 1
-; RV32-BITS-512-NEXT: sb a1, 15(sp)
; RV32-BITS-512-NEXT: slli a1, a0, 30
; RV32-BITS-512-NEXT: srli a1, a1, 31
; RV32-BITS-512-NEXT: sb a1, 14(sp)
@@ -476,10 +484,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16
; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16
+; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT: sb a0, 15(sp)
; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT: andi a1, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 15(sp)
; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
@@ -537,10 +547,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: addi sp, sp, -16
; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16
+; RV64-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-256-NEXT: vfirst.m a0, v0
+; RV64-BITS-256-NEXT: seqz a0, a0
+; RV64-BITS-256-NEXT: sb a0, 15(sp)
; RV64-BITS-256-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV64-BITS-256-NEXT: vmv.x.s a0, v0
-; RV64-BITS-256-NEXT: andi a1, a0, 1
-; RV64-BITS-256-NEXT: sb a1, 15(sp)
; RV64-BITS-256-NEXT: slli a1, a0, 62
; RV64-BITS-256-NEXT: srli a1, a1, 63
; RV64-BITS-256-NEXT: sb a1, 14(sp)
@@ -598,10 +610,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: addi sp, sp, -16
; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16
+; RV64-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-512-NEXT: vfirst.m a0, v0
+; RV64-BITS-512-NEXT: seqz a0, a0
+; RV64-BITS-512-NEXT: sb a0, 15(sp)
; RV64-BITS-512-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV64-BITS-512-NEXT: vmv.x.s a0, v0
-; RV64-BITS-512-NEXT: andi a1, a0, 1
-; RV64-BITS-512-NEXT: sb a1, 15(sp)
; RV64-BITS-512-NEXT: slli a1, a0, 62
; RV64-BITS-512-NEXT: srli a1, a1, 63
; RV64-BITS-512-NEXT: sb a1, 14(sp)
@@ -670,103 +684,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 64
; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -32
-; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1
+; RV32-BITS-UNKNOWN-NEXT: li a0, 32
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vfirst.m a1, v0
+; RV32-BITS-UNKNOWN-NEXT: seqz a1, a1
; RV32-BITS-UNKNOWN-NEXT: sb a1, 31(sp)
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a0, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 30(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 29(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 28(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 27(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 26(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 25(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 24(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 23(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 22(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 21(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 20(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 19(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 18(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 17(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 16(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 15
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 15(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 14
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 13
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 12
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 11
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 10
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 9
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 8
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 7
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 6
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2
+; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 0(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 30(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 29(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 28(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 27(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 26(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 25(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 24(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 23(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 22(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 21(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 20(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 19(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 18(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 17(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 16
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 16(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 15(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 14(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 13
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 13(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 12(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 11(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 10(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 9(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 8(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 7(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 6(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 5(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 4(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 3(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 2(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 1
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1
-; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a0, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT: li a0, 32
+; RV32-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
; RV32-BITS-UNKNOWN-NEXT: mv a1, sp
; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a1)
@@ -789,103 +805,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
; RV32-BITS-256-NEXT: addi s0, sp, 64
; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0
; RV32-BITS-256-NEXT: andi sp, sp, -32
-; RV32-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-256-NEXT: vmv.x.s a0, v0
-; RV32-BITS-256-NEXT: andi a1, a0, 1
+; RV32-BITS-256-NEXT: li a0, 32
+; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; RV32-BITS-256-NEXT: vfirst.m a1, v0
+; RV32-BITS-256-NEXT: seqz a1, a1
; RV32-BITS-256-NEXT: sb a1, 31(sp)
-; RV32-BITS-256-NEXT: srli a1, a0, 31
-; RV32-BITS-256-NEXT: sb a1, 0(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 30
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 30(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 29
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 29(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 28
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 28(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 27
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 27(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 26
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 26(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 25
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 25(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 24
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 24(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 23
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 23(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 22
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 22(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 21
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 21(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 20
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 20(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 19
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 19(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 18
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 18(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 17
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 17(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 16
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 16(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 15
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 15(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 14
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 14(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 13
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 13(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 12
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 12(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 11
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 11(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 10
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 10(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 9
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 9(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 8
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 8(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 7
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 7(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 6
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 6(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 5
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 5(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 4
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 4(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 3
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 3(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 2
+; RV32-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-256-NEXT: vmv.x.s a1, v0
+; RV32-BITS-256-NEXT: srli a2, a1, 31
+; RV32-BITS-256-NEXT: sb a2, 0(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 30
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 30(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 29
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 29(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 28
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 28(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 27
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 27(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 26
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 26(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 25
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 25(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 24
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 24(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 23
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 23(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 22
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 22(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 21
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 21(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 20
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 20(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 19
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 19(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 18
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 18(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 17
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 17(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 16
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 16(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 15
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 15(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 14
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 14(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 13
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 13(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 12
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 12(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 11
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 11(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 10
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 10(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 9
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 9(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 8
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 8(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 7
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 7(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 6
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 6(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 5
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 5(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 4
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 4(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 3
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 3(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 2
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 2(sp)
+; RV32-BITS-256-NEXT: slli a1, a1, 1
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 2(sp)
-; RV32-BITS-256-NEXT: slli a0, a0, 1
-; RV32-BITS-256-NEXT: srli a0, a0, 31
-; RV32-BITS-256-NEXT: sb a0, 1(sp)
-; RV32-BITS-256-NEXT: li a0, 32
+; RV32-BITS-256-NEXT: sb a1, 1(sp)
; RV32-BITS-256-NEXT: mv a1, sp
; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV32-BITS-256-NEXT: vle8.v v8, (a1)
@@ -908,103 +926,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
; RV32-BITS-512-NEXT: addi s0, sp, 64
; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0
; RV32-BITS-512-NEXT: andi sp, sp, -32
-; RV32-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-512-NEXT: vmv.x.s a0, v0
-; RV32-BITS-512-NEXT: andi a1, a0, 1
+; RV32-BITS-512-NEXT: li a0, 32
+; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; RV32-BITS-512-NEXT: vfirst.m a1, v0
+; RV32-BITS-512-NEXT: seqz a1, a1
; RV32-BITS-512-NEXT: sb a1, 31(sp)
-; RV32-BITS-512-NEXT: srli a1, a0, 31
-; RV32-BITS-512-NEXT: sb a1, 0(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 30
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 30(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 29
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 29(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 28
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 28(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 27
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 27(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 26
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 26(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 25
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 25(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 24
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 24(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 23
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 23(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 22
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 22(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 21
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 21(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 20
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 20(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 19
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 19(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 18
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 18(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 17
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 17(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 16
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 16(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 15
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 15(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 14
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 14(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 13
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 13(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 12
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 12(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 11
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 11(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 10
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 10(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 9
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 9(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 8
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 8(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 7
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 7(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 6
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 6(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 5
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 5(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 4
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 4(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 3
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 3(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 2
+; RV32-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-512-NEXT: vmv.x.s a1, v0
+; RV32-BITS-512-NEXT: srli a2, a1, 31
+; RV32-BITS-512-NEXT: sb a2, 0(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 30
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 30(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 29
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 29(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 28
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 28(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 27
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 27(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 26
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 26(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 25
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 25(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 24
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 24(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 23
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 23(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 22
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 22(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 21
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 21(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 20
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 20(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 19
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 19(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 18
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 18(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 17
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 17(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 16
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 16(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 15
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 15(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 14
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 14(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 13
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 13(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 12
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 12(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 11
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 11(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 10
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 10(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 9
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 9(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 8
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 8(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 7
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 7(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 6
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 6(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 5
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 5(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 4
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 4(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 3
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 3(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 2
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 2(sp)
+; RV32-BITS-512-NEXT: slli a1, a1, 1
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 2(sp)
-; RV32-BITS-512-NEXT: slli a0, a0, 1
-; RV32-BITS-512-NEXT: srli a0, a0, 31
-; RV32-BITS-512-NEXT: sb a0, 1(sp)
-; RV32-BITS-512-NEXT: li a0, 32
+; RV32-BITS-512-NEXT: sb a1, 1(sp)
; RV32-BITS-512-NEXT: mv a1, sp
; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV32-BITS-512-NEXT: vle8.v v8, (a1)
@@ -1027,103 +1047,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 64
; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -32
-; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT: andi a1, a0, 1
+; RV64-BITS-UNKNOWN-NEXT: li a0, 32
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vfirst.m a1, v0
+; RV64-BITS-UNKNOWN-NEXT: seqz a1, a1
; RV64-BITS-UNKNOWN-NEXT: sb a1, 31(sp)
-; RV64-BITS-UNKNOWN-NEXT: srliw a1, a0, 31
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 30(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 29(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 28(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 27(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 26(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 25(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 24(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 23(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 22(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 21(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 20(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 19(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 18(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 17(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 16(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 47
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 15(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 46
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 45
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 44
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 43
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 42
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 41
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 40
+; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0
+; RV64-BITS-UNKNOWN-NEXT: srliw a2, a1, 31
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 0(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 62
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 30(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 61
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 29(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 60
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 28(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 59
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 27(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 58
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 26(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 57
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 25(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 56
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 24(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 55
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 23(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 54
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 22(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 53
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 21(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 52
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 20(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 51
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 19(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 50
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 18(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 49
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 17(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 48
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 16(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 47
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 15(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 46
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 14(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 45
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 13(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 44
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 12(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 43
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 11(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 42
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 10(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 41
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 9(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 40
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 8(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 39
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 7(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 38
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 6(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 37
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 5(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 36
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 4(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 35
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 3(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 34
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 2(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a1, 33
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 39
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 38
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 37
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 36
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 35
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 34
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 33
-; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a0, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT: li a0, 32
+; RV64-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
; RV64-BITS-UNKNOWN-NEXT: mv a1, sp
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a1)
@@ -1146,103 +1168,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
; RV64-BITS-256-NEXT: addi s0, sp, 64
; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0
; RV64-BITS-256-NEXT: andi sp, sp, -32
-; RV64-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-256-NEXT: vmv.x.s a0, v0
-; RV64-BITS-256-NEXT: andi a1, a0, 1
+; RV64-BITS-256-NEXT: li a0, 32
+; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; RV64-BITS-256-NEXT: vfirst.m a1, v0
+; RV64-BITS-256-NEXT: seqz a1, a1
; RV64-BITS-256-NEXT: sb a1, 31(sp)
-; RV64-BITS-256-NEXT: srliw a1, a0, 31
-; RV64-BITS-256-NEXT: sb a1, 0(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 62
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 30(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 61
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 29(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 60
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 28(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 59
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 27(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 58
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 26(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 57
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 25(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 56
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 24(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 55
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 23(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 54
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 22(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 53
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 21(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 52
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 20(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 51
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 19(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 50
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 18(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 49
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 17(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 48
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 16(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 47
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 15(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 46
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 14(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 45
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 13(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 44
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 12(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 43
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 11(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 42
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 10(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 41
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 9(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 40
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 8(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 39
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 7(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 38
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 6(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 37
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 5(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 36
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 4(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 35
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 3(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 34
+; RV64-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV64-BITS-256-NEXT: vmv.x.s a1, v0
+; RV64-BITS-256-NEXT: srliw a2, a1, 31
+; RV64-BITS-256-NEXT: sb a2, 0(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 62
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 30(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 61
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 29(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 60
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 28(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 59
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 27(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 58
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 26(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 57
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 25(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 56
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 24(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 55
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 23(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 54
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 22(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 53
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 21(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 52
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 20(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 51
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 19(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 50
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 18(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 49
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 17(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 48
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 16(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 47
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 15(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 46
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 14(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 45
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 13(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 44
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 12(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 43
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 11(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 42
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 10(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 41
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 9(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 40
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 8(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 39
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 7(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 38
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 6(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 37
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 5(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 36
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 4(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 35
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 3(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 34
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 2(sp)
+; RV64-BITS-256-NEXT: slli a1, a1, 33
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 2(sp)
-; RV64-BITS-256-NEXT: slli a0, a0, 33
-; RV64-BITS-256-NEXT: srli a0, a0, 63
-; RV64-BITS-256-NEXT: sb a0, 1(sp)
-; RV64-BITS-256-NEXT: li a0, 32
+; RV64-BITS-256-NEXT: sb a1, 1(sp)
; RV64-BITS-256-NEXT: mv a1, sp
; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV64-BITS-256-NEXT: vle8.v v8, (a1)
@@ -1265,103 +1289,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
; RV64-BITS-512-NEXT: addi s0, sp, 64
; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0
; RV64-BITS-512-NEXT: andi sp, sp, -32
-; RV64-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-512-NEXT: vmv.x.s a0, v0
-; RV64-BITS-512-NEXT: andi a1, a0, 1
+; RV64-BITS-512-NEXT: li a0, 32
+; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; RV64-BITS-512-NEXT: vfirst.m a1, v0
+; RV64-BITS-512-NEXT: seqz a1, a1
; RV64-BITS-512-NEXT: sb a1, 31(sp)
-; RV64-BITS-512-NEXT: srliw a1, a0, 31
-; RV64-BITS-512-NEXT: sb a1, 0(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 62
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 30(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 61
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 29(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 60
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 28(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 59
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 27(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 58
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 26(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 57
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 25(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 56
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 24(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 55
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 23(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 54
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 22(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 53
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 21(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 52
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 20(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 51
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 19(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 50
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 18(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 49
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 17(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 48
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 16(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 47
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 15(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 46
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 14(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 45
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 13(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 44
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 12(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 43
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 11(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 42
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 10(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 41
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 9(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 40
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 8(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 39
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 7(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 38
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 6(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 37
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 5(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 36
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 4(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 35
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 3(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 34
+; RV64-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV64-BITS-512-NEXT: vmv.x.s a1, v0
+; RV64-BITS-512-NEXT: srliw a2, a1, 31
+; RV64-BITS-512-NEXT: sb a2, 0(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 62
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 30(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 61
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 29(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 60
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 28(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 59
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 27(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 58
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 26(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 57
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 25(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 56
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 24(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 55
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 23(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 54
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 22(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 53
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 21(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 52
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 20(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 51
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 19(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 50
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 18(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 49
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 17(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 48
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 16(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 47
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 15(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 46
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 14(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 45
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 13(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 44
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 12(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 43
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 11(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 42
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 10(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 41
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 9(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 40
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 8(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 39
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 7(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 38
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 6(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 37
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 5(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 36
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 4(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 35
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 3(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 34
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 2(sp)
+; RV64-BITS-512-NEXT: slli a1, a1, 33
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 2(sp)
-; RV64-BITS-512-NEXT: slli a0, a0, 33
-; RV64-BITS-512-NEXT: srli a0, a0, 63
-; RV64-BITS-512-NEXT: sb a0, 1(sp)
-; RV64-BITS-512-NEXT: li a0, 32
+; RV64-BITS-512-NEXT: sb a1, 1(sp)
; RV64-BITS-512-NEXT: mv a1, sp
; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV64-BITS-512-NEXT: vle8.v v8, (a1)
@@ -1388,199 +1414,201 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 128
; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -64
-; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1
+; RV32-BITS-UNKNOWN-NEXT: li a0, 64
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vfirst.m a1, v0
+; RV32-BITS-UNKNOWN-NEXT: seqz a1, a1
; RV32-BITS-UNKNOWN-NEXT: sb a1, 63(sp)
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a0, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 32(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 62(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 61(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 60(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 59(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 58(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 57(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 56(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 55(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 54(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 53(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 52(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 51(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 50(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 49(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 48(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 15
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 47(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 14
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 46(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 13
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 45(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 12
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 44(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 11
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 43(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 10
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 42(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 9
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 41(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 8
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 40(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 7
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 39(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 6
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 38(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 37(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 36(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 35(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 34(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1
-; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a0, 33(sp)
+; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 32(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 62(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 61(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 60(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 59(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 58(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 57(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 56(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 55(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 54(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 53(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 52(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 51(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 50(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 49(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 16
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 48(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 47(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 46(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 13
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 45(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 44(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 43(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 42(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 41(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 40(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 39(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 38(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 37(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 36(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 35(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 34(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 1
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a1, 33(sp)
; RV32-BITS-UNKNOWN-NEXT: vslidedown.vi v8, v0, 1
-; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v8
-; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 31(sp)
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a0, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 30(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 29(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 28(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 27(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 26(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 25(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 24(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 23(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 22(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 21(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 20(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 19(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 18(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 17(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 16(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 15
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 15(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 14
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 13
+; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v8
+; RV32-BITS-UNKNOWN-NEXT: andi a2, a1, 1
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 31(sp)
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 0(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 30(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 29(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 28(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 27(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 26(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 25(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 24(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 23(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 22(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 21(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 20(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 19(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 18(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 17(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 16
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 16(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 15(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 14(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 13
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 13(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 12(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 11(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 10(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 9(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 8(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 7(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 6(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 5(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 4(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 3(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT: sb a2, 2(sp)
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 1
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 12
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 11
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 10
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 9
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 8
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 7
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 6
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1
-; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a0, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT: li a0, 64
+; RV32-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
; RV32-BITS-UNKNOWN-NEXT: mv a1, sp
; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a1)
@@ -1603,199 +1631,201 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
; RV32-BITS-256-NEXT: addi s0, sp, 128
; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0
; RV32-BITS-256-NEXT: andi sp, sp, -64
-; RV32-BITS-256-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-256-NEXT: vmv.x.s a0, v0
-; RV32-BITS-256-NEXT: andi a1, a0, 1
+; RV32-BITS-256-NEXT: li a0, 64
+; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-256-NEXT: vfirst.m a1, v0
+; RV32-BITS-256-NEXT: seqz a1, a1
; RV32-BITS-256-NEXT: sb a1, 63(sp)
-; RV32-BITS-256-NEXT: srli a1, a0, 31
-; RV32-BITS-256-NEXT: sb a1, 32(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 30
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 62(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 29
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 61(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 28
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 60(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 27
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 59(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 26
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 58(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 25
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 57(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 24
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 56(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 23
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 55(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 22
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 54(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 21
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 53(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 20
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 52(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 19
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 51(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 18
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 50(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 17
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 49(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 16
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 48(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 15
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 47(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 14
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 46(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 13
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 45(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 12
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 44(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 11
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 43(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 10
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 42(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 9
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 41(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 8
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 40(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 7
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 39(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 6
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 38(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 5
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 37(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 4
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 36(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 3
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 35(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 2
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 34(sp)
-; RV32-BITS-256-NEXT: slli a0, a0, 1
-; RV32-BITS-256-NEXT: srli a0, a0, 31
-; RV32-BITS-256-NEXT: sb a0, 33(sp)
+; RV32-BITS-256-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-BITS-256-NEXT: vmv.x.s a1, v0
+; RV32-BITS-256-NEXT: srli a2, a1, 31
+; RV32-BITS-256-NEXT: sb a2, 32(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 30
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 62(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 29
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 61(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 28
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 60(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 27
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 59(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 26
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 58(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 25
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 57(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 24
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 56(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 23
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 55(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 22
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 54(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 21
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 53(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 20
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 52(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 19
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 51(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 18
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 50(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 17
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 49(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 16
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 48(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 15
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 47(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 14
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 46(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 13
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 45(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 12
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 44(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 11
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 43(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 10
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 42(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 9
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 41(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 8
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 40(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 7
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 39(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 6
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 38(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 5
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 37(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 4
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 36(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 3
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 35(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 2
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 34(sp)
+; RV32-BITS-256-NEXT: slli a1, a1, 1
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: sb a1, 33(sp)
; RV32-BITS-256-NEXT: vslidedown.vi v8, v0, 1
-; RV32-BITS-256-NEXT: vmv.x.s a0, v8
-; RV32-BITS-256-NEXT: andi a1, a0, 1
-; RV32-BITS-256-NEXT: sb a1, 31(sp)
-; RV32-BITS-256-NEXT: srli a1, a0, 31
-; RV32-BITS-256-NEXT: sb a1, 0(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 30
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 30(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 29
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 29(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 28
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 28(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 27
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 27(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 26
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 26(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 25
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 25(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 24
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 24(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 23
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 23(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 22
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 22(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 21
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 21(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 20
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 20(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 19
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 19(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 18
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 18(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 17
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 17(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 16
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 16(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 15
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 15(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 14
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 14(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 13
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 13(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 12
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 12(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 11
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 11(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 10
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 10(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 9
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 9(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 8
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 8(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 7
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 7(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 6
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 6(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 5
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 5(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 4
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 4(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 3
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 3(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 2
+; RV32-BITS-256-NEXT: vmv.x.s a1, v8
+; RV32-BITS-256-NEXT: andi a2, a1, 1
+; RV32-BITS-256-NEXT: sb a2, 31(sp)
+; RV32-BITS-256-NEXT: srli a2, a1, 31
+; RV32-BITS-256-NEXT: sb a2, 0(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 30
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 30(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 29
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 29(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 28
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 28(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 27
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 27(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 26
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 26(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 25
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 25(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 24
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 24(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 23
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 23(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 22
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 22(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 21
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 21(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 20
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 20(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 19
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 19(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 18
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 18(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 17
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 17(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 16
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 16(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 15
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 15(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 14
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 14(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 13
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 13(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 12
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 12(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 11
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 11(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 10
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 10(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 9
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 9(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 8
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 8(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 7
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 7(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 6
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 6(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 5
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 5(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 4
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 4(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 3
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 3(sp)
+; RV32-BITS-256-NEXT: slli a2, a1, 2
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: sb a2, 2(sp)
+; RV32-BITS-256-NEXT: slli a1, a1, 1
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 2(sp)
-; RV32-BITS-256-NEXT: slli a0, a0, 1
-; RV32-BITS-256-NEXT: srli a0, a0, 31
-; RV32-BITS-256-NEXT: sb a0, 1(sp)
-; RV32-BITS-256-NEXT: li a0, 64
+; RV32-BITS-256-NEXT: sb a1, 1(sp)
; RV32-BITS-256-NEXT: mv a1, sp
; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-BITS-256-NEXT: vle8.v v8, (a1)
@@ -1818,199 +1848,201 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
; RV32-BITS-512-NEXT: addi s0, sp, 128
; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0
; RV32-BITS-512-NEXT: andi sp, sp, -64
-; RV32-BITS-512-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-512-NEXT: vmv.x.s a0, v0
-; RV32-BITS-512-NEXT: andi a1, a0, 1
+; RV32-BITS-512-NEXT: li a0, 64
+; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-512-NEXT: vfirst.m a1, v0
+; RV32-BITS-512-NEXT: seqz a1, a1
; RV32-BITS-512-NEXT: sb a1, 63(sp)
-; RV32-BITS-512-NEXT: srli a1, a0, 31
-; RV32-BITS-512-NEXT: sb a1, 32(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 30
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 62(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 29
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 61(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 28
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 60(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 27
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 59(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 26
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 58(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 25
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 57(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 24
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 56(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 23
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 55(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 22
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 54(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 21
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 53(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 20
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 52(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 19
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 51(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 18
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 50(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 17
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 49(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 16
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 48(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 15
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 47(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 14
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 46(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 13
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 45(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 12
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 44(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 11
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 43(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 10
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 42(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 9
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 41(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 8
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 40(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 7
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 39(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 6
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 38(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 5
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 37(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 4
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 36(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 3
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 35(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 2
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 34(sp)
-; RV32-BITS-512-NEXT: slli a0, a0, 1
-; RV32-BITS-512-NEXT: srli a0, a0, 31
-; RV32-BITS-512-NEXT: sb a0, 33(sp)
+; RV32-BITS-512-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-BITS-512-NEXT: vmv.x.s a1, v0
+; RV32-BITS-512-NEXT: srli a2, a1, 31
+; RV32-BITS-512-NEXT: sb a2, 32(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 30
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 62(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 29
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 61(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 28
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 60(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 27
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 59(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 26
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 58(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 25
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 57(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 24
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 56(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 23
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 55(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 22
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 54(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 21
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 53(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 20
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 52(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 19
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 51(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 18
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 50(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 17
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 49(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 16
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 48(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 15
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 47(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 14
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 46(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 13
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 45(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 12
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 44(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 11
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 43(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 10
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 42(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 9
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 41(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 8
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 40(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 7
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 39(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 6
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 38(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 5
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 37(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 4
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 36(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 3
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 35(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 2
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 34(sp)
+; RV32-BITS-512-NEXT: slli a1, a1, 1
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: sb a1, 33(sp)
; RV32-BITS-512-NEXT: vslidedown.vi v8, v0, 1
-; RV32-BITS-512-NEXT: vmv.x.s a0, v8
-; RV32-BITS-512-NEXT: andi a1, a0, 1
-; RV32-BITS-512-NEXT: sb a1, 31(sp)
-; RV32-BITS-512-NEXT: srli a1, a0, 31
-; RV32-BITS-512-NEXT: sb a1, 0(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 30
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 30(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 29
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 29(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 28
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 28(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 27
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 27(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 26
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 26(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 25
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 25(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 24
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 24(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 23
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 23(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 22
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 22(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 21
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 21(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 20
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 20(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 19
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 19(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 18
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 18(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 17
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 17(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 16
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 16(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 15
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 15(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 14
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 14(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 13
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 13(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 12
+; RV32-BITS-512-NEXT: vmv.x.s a1, v8
+; RV32-BITS-512-NEXT: andi a2, a1, 1
+; RV32-BITS-512-NEXT: sb a2, 31(sp)
+; RV32-BITS-512-NEXT: srli a2, a1, 31
+; RV32-BITS-512-NEXT: sb a2, 0(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 30
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 30(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 29
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 29(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 28
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 28(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 27
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 27(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 26
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 26(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 25
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 25(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 24
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 24(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 23
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 23(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 22
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 22(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 21
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 21(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 20
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 20(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 19
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 19(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 18
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 18(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 17
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 17(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 16
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 16(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 15
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 15(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 14
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 14(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 13
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 13(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 12
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 12(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 11
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 11(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 10
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 10(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 9
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 9(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 8
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 8(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 7
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 7(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 6
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 6(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 5
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 5(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 4
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 4(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 3
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 3(sp)
+; RV32-BITS-512-NEXT: slli a2, a1, 2
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: sb a2, 2(sp)
+; RV32-BITS-512-NEXT: slli a1, a1, 1
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 12(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 11
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 11(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 10
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 10(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 9
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 9(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 8
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 8(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 7
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 7(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 6
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 6(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 5
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 5(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 4
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 4(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 3
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 3(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 2
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 2(sp)
-; RV32-BITS-512-NEXT: slli a0, a0, 1
-; RV32-BITS-512-NEXT: srli a0, a0, 31
-; RV32-BITS-512-NEXT: sb a0, 1(sp)
-; RV32-BITS-512-NEXT: li a0, 64
+; RV32-BITS-512-NEXT: sb a1, 1(sp)
; RV32-BITS-512-NEXT: mv a1, sp
; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-BITS-512-NEXT: vle8.v v8, (a1)
@@ -2033,198 +2065,200 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 128
; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -64
-; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT: andi a1, a0, 1
+; RV64-BITS-UNKNOWN-NEXT: li a0, 64
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vfirst.m a1, v0
+; RV64-BITS-UNKNOWN-NEXT: seqz a1, a1
; RV64-BITS-UNKNOWN-NEXT: sb a1, 63(sp)
-; RV64-BITS-UNKNOWN-NEXT: srliw a1, a0, 31
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 32(sp)
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a0, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 62(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 61(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 60(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 59(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 58(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 57(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 56(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 55(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 54(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 53(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 52(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 51(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 50(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 49(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 48(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 47
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 47(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 46
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 46(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 45
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 45(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 44
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 44(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 43
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 43(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 42
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 42(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 41
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 41(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 40
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 40(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 39
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 39(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 38
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 38(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 37
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 37(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 36
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 36(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 35
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 35(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 34
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 34(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 33
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 33(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 31
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 31(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 30
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 30(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 29
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 29(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 28
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 28(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 27
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 27(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 26
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 26(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 25
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 25(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 24
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 24(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 23
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 23(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 22
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 22(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 21
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 21(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 20
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 20(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 19
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 19(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 18
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 18(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 17
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 17(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 16
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 16(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 15
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 15(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 14
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 13
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 12
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 11
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 10
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 9
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 8
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 7
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 6
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 5
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 4
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2
+; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0
+; RV64-BITS-UNKNOWN-NEXT: srliw a2, a1, 31
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 32(sp)
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 0(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 62
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 62(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 61
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 61(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 60
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 60(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 59
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 59(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 58
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 58(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 57
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 57(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 56
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 56(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 55
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 55(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 54
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 54(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 53
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 53(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 52
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 52(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 51
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 51(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 50
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 50(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 49
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 49(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 48
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 48(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 47
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 47(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 46
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 46(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 45
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 45(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 44
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 44(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 43
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 43(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 42
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 42(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 41
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 41(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 40
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 40(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 39
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 39(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 38
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 38(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 37
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 37(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 36
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 36(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 35
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 35(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 34
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 34(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 33
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 33(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 31
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 31(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 30
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 30(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 29
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 29(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 28
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 28(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 27
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 27(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 26
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 26(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 25
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 25(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 24
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 24(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 23
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 23(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 22
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 22(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 21
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 21(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 20
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 20(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 19
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 19(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 18
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 18(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 17
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 17(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 16
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 16(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 15
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 15(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 14
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 14(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 13
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 13(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 12
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 12(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 11
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 11(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 10
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 10(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 9
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 9(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 8
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 8(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 7
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 7(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 6
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 6(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 5
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 5(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 4
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 4(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 3
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 3(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 2
+; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT: sb a2, 2(sp)
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a1, 1
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a0, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT: li a0, 64
+; RV64-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
; RV64-BITS-UNKNOWN-NEXT: mv a1, sp
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a1)
@@ -2247,198 +2281,200 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
; RV64-BITS-256-NEXT: addi s0, sp, 128
; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0
; RV64-BITS-256-NEXT: andi sp, sp, -64
-; RV64-BITS-256-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-256-NEXT: vmv.x.s a0, v0
-; RV64-BITS-256-NEXT: andi a1, a0, 1
+; RV64-BITS-256-NEXT: li a0, 64
+; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV64-BITS-256-NEXT: vfirst.m a1, v0
+; RV64-BITS-256-NEXT: seqz a1, a1
; RV64-BITS-256-NEXT: sb a1, 63(sp)
-; RV64-BITS-256-NEXT: srliw a1, a0, 31
-; RV64-BITS-256-NEXT: sb a1, 32(sp)
-; RV64-BITS-256-NEXT: srli a1, a0, 63
-; RV64-BITS-256-NEXT: sb a1, 0(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 62
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 62(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 61
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 61(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 60
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 60(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 59
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 59(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 58
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 58(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 57
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 57(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 56
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 56(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 55
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 55(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 54
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 54(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 53
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 53(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 52
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 52(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 51
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 51(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 50
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 50(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 49
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 49(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 48
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 48(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 47
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 47(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 46
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 46(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 45
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 45(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 44
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 44(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 43
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 43(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 42
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 42(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 41
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 41(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 40
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 40(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 39
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 39(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 38
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 38(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 37
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 37(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 36
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 36(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 35
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 35(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 34
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 34(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 33
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 33(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 31
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 31(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 30
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 30(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 29
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 29(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 28
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 28(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 27
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 27(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 26
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 26(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 25
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 25(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 24
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 24(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 23
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 23(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 22
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 22(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 21
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 21(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 20
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 20(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 19
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 19(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 18
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 18(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 17
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 17(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 16
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 16(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 15
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 15(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 14
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 14(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 13
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 13(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 12
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 12(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 11
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 11(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 10
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 10(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 9
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 9(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 8
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 8(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 7
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 7(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 6
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 6(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 5
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 5(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 4
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 4(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 3
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 3(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 2
+; RV64-BITS-256-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV64-BITS-256-NEXT: vmv.x.s a1, v0
+; RV64-BITS-256-NEXT: srliw a2, a1, 31
+; RV64-BITS-256-NEXT: sb a2, 32(sp)
+; RV64-BITS-256-NEXT: srli a2, a1, 63
+; RV64-BITS-256-NEXT: sb a2, 0(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 62
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 62(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 61
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 61(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 60
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 60(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 59
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 59(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 58
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 58(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 57
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 57(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 56
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 56(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 55
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 55(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 54
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 54(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 53
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 53(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 52
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 52(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 51
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 51(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 50
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 50(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 49
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 49(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 48
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 48(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 47
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 47(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 46
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 46(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 45
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 45(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 44
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 44(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 43
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 43(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 42
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 42(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 41
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 41(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 40
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 40(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 39
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 39(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 38
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 38(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 37
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 37(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 36
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 36(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 35
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 35(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 34
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 34(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 33
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 33(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 31
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 31(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 30
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 30(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 29
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 29(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 28
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 28(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 27
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 27(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 26
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 26(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 25
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 25(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 24
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 24(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 23
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 23(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 22
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 22(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 21
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 21(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 20
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 20(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 19
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 19(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 18
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 18(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 17
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 17(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 16
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 16(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 15
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 15(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 14
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 14(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 13
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 13(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 12
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 12(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 11
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 11(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 10
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 10(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 9
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 9(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 8
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 8(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 7
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 7(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 6
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 6(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 5
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 5(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 4
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 4(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 3
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 3(sp)
+; RV64-BITS-256-NEXT: slli a2, a1, 2
+; RV64-BITS-256-NEXT: srli a2, a2, 63
+; RV64-BITS-256-NEXT: sb a2, 2(sp)
+; RV64-BITS-256-NEXT: slli a1, a1, 1
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 2(sp)
-; RV64-BITS-256-NEXT: slli a0, a0, 1
-; RV64-BITS-256-NEXT: srli a0, a0, 63
-; RV64-BITS-256-NEXT: sb a0, 1(sp)
-; RV64-BITS-256-NEXT: li a0, 64
+; RV64-BITS-256-NEXT: sb a1, 1(sp)
; RV64-BITS-256-NEXT: mv a1, sp
; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64-BITS-256-NEXT: vle8.v v8, (a1)
@@ -2461,198 +2497,200 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
; RV64-BITS-512-NEXT: addi s0, sp, 128
; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0
; RV64-BITS-512-NEXT: andi sp, sp, -64
-; RV64-BITS-512-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-512-NEXT: vmv.x.s a0, v0
-; RV64-BITS-512-NEXT: andi a1, a0, 1
+; RV64-BITS-512-NEXT: li a0, 64
+; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV64-BITS-512-NEXT: vfirst.m a1, v0
+; RV64-BITS-512-NEXT: seqz a1, a1
; RV64-BITS-512-NEXT: sb a1, 63(sp)
-; RV64-BITS-512-NEXT: srliw a1, a0, 31
-; RV64-BITS-512-NEXT: sb a1, 32(sp)
-; RV64-BITS-512-NEXT: srli a1, a0, 63
-; RV64-BITS-512-NEXT: sb a1, 0(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 62
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 62(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 61
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 61(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 60
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 60(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 59
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 59(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 58
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 58(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 57
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 57(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 56
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 56(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 55
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 55(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 54
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 54(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 53
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 53(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 52
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 52(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 51
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 51(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 50
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 50(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 49
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 49(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 48
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 48(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 47
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 47(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 46
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 46(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 45
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 45(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 44
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 44(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 43
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 43(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 42
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 42(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 41
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 41(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 40
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 40(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 39
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 39(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 38
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 38(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 37
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 37(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 36
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 36(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 35
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 35(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 34
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 34(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 33
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 33(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 31
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 31(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 30
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 30(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 29
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 29(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 28
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 28(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 27
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 27(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 26
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 26(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 25
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 25(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 24
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 24(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 23
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 23(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 22
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 22(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 21
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 21(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 20
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 20(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 19
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 19(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 18
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 18(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 17
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 17(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 16
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 16(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 15
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 15(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 14
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 14(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 13
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 13(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 12
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 12(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 11
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 11(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 10
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 10(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 9
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 9(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 8
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 8(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 7
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 7(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 6
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 6(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 5
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 5(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 4
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 4(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 3
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 3(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 2
+; RV64-BITS-512-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV64-BITS-512-NEXT: vmv.x.s a1, v0
+; RV64-BITS-512-NEXT: srliw a2, a1, 31
+; RV64-BITS-512-NEXT: sb a2, 32(sp)
+; RV64-BITS-512-NEXT: srli a2, a1, 63
+; RV64-BITS-512-NEXT: sb a2, 0(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 62
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 62(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 61
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 61(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 60
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 60(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 59
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 59(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 58
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 58(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 57
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 57(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 56
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 56(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 55
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 55(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 54
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 54(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 53
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 53(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 52
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 52(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 51
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 51(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 50
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 50(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 49
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 49(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 48
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 48(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 47
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 47(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 46
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 46(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 45
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 45(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 44
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 44(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 43
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 43(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 42
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 42(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 41
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 41(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 40
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 40(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 39
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 39(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 38
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 38(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 37
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 37(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 36
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 36(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 35
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 35(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 34
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 34(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 33
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 33(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 31
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 31(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 30
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 30(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 29
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 29(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 28
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 28(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 27
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 27(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 26
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 26(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 25
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 25(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 24
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 24(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 23
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 23(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 22
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 22(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 21
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 21(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 20
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 20(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 19
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 19(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 18
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 18(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 17
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 17(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 16
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 16(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 15
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 15(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 14
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 14(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 13
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 13(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 12
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 12(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 11
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 11(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 10
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 10(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 9
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 9(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 8
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 8(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 7
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 7(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 6
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 6(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 5
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 5(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 4
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 4(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 3
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 3(sp)
+; RV64-BITS-512-NEXT: slli a2, a1, 2
+; RV64-BITS-512-NEXT: srli a2, a2, 63
+; RV64-BITS-512-NEXT: sb a2, 2(sp)
+; RV64-BITS-512-NEXT: slli a1, a1, 1
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 2(sp)
-; RV64-BITS-512-NEXT: slli a0, a0, 1
-; RV64-BITS-512-NEXT: srli a0, a0, 63
-; RV64-BITS-512-NEXT: sb a0, 1(sp)
-; RV64-BITS-512-NEXT: li a0, 64
+; RV64-BITS-512-NEXT: sb a1, 1(sp)
; RV64-BITS-512-NEXT: mv a1, sp
; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64-BITS-512-NEXT: vle8.v v8, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index 2035a578c8023..ded6df1a77ef9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -453,10 +453,9 @@ define i1 @extractelt_v1i1_idx0(ptr %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v8, v8, 0
+; CHECK-NEXT: vfirst.m a0, v8
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <1 x i8>, ptr %x
%b = icmp eq <1 x i8> %a, zeroinitializer
@@ -469,10 +468,9 @@ define i1 @extractelt_v2i1_idx0(ptr %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v8, v8, 0
+; CHECK-NEXT: vfirst.m a0, v8
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <2 x i8>, ptr %x
%b = icmp eq <2 x i8> %a, zeroinitializer
@@ -485,10 +483,9 @@ define i1 @extractelt_v4i1_idx0(ptr %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmseq.vi v8, v8, 0
+; CHECK-NEXT: vfirst.m a0, v8
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <4 x i8>, ptr %x
%b = icmp eq <4 x i8> %a, zeroinitializer
@@ -502,8 +499,8 @@ define i1 @extractelt_v8i1_idx0(ptr %x) nounwind {
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmseq.vi v8, v8, 0
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vfirst.m a0, v8
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %x
%b = icmp eq <8 x i8> %a, zeroinitializer
@@ -517,9 +514,8 @@ define i1 @extractelt_v16i1_idx0(ptr %x) nounwind {
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmseq.vi v8, v8, 0
-; CHECK-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vfirst.m a0, v8
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <16 x i8>, ptr %x
%b = icmp eq <16 x i8> %a, zeroinitializer
@@ -534,9 +530,8 @@ define i1 @extractelt_v32i1_idx0(ptr %x) nounwind {
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmseq.vi v10, v8, 0
-; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.x.s a0, v10
-; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vfirst.m a0, v10
+; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
%a = load <32 x i8>, ptr %x
%b = icmp eq <32 x i8> %a, zeroinitializer
@@ -545,49 +540,15 @@ define i1 @extractelt_v32i1_idx0(ptr %x) nounwind {
}
define i1 @extractelt_v64i1_idx0(ptr %x) nounwind {
-; RV32-LABEL: extractelt_v64i1_idx0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 64
-; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
-; RV32-NEXT: vmseq.vi v12, v8, 0
-; RV32-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: ret
-;
-; RV64-LABEL: extractelt_v64i1_idx0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 64
-; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; RV64-NEXT: vle8.v v8, (a0)
-; RV64-NEXT: vmseq.vi v12, v8, 0
-; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64-NEXT: vmv.x.s a0, v12
-; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: ret
-;
-; RV32ZBS-LABEL: extractelt_v64i1_idx0:
-; RV32ZBS: # %bb.0:
-; RV32ZBS-NEXT: li a1, 64
-; RV32ZBS-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; RV32ZBS-NEXT: vle8.v v8, (a0)
-; RV32ZBS-NEXT: vmseq.vi v12, v8, 0
-; RV32ZBS-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV32ZBS-NEXT: vmv.x.s a0, v12
-; RV32ZBS-NEXT: andi a0, a0, 1
-; RV32ZBS-NEXT: ret
-;
-; RV64ZBS-LABEL: extractelt_v64i1_idx0:
-; RV64ZBS: # %bb.0:
-; RV64ZBS-NEXT: li a1, 64
-; RV64ZBS-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; RV64ZBS-NEXT: vle8.v v8, (a0)
-; RV64ZBS-NEXT: vmseq.vi v12, v8, 0
-; RV64ZBS-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64ZBS-NEXT: vmv.x.s a0, v12
-; RV64ZBS-NEXT: andi a0, a0, 1
-; RV64ZBS-NEXT: ret
+; CHECK-LABEL: extractelt_v64i1_idx0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 64
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vmseq.vi v12, v8, 0
+; CHECK-NEXT: vfirst.m a0, v12
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
%a = load <64 x i8>, ptr %x
%b = icmp eq <64 x i8> %a, zeroinitializer
%c = extractelement <64 x i1> %b, i64 0
@@ -595,49 +556,15 @@ define i1 @extractelt_v64i1_idx0(ptr %x) nounwind {
}
define i1 @extractelt_v128i1_idx0(ptr %x) nounwind {
-; RV32-LABEL: extractelt_v128i1_idx0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 128
-; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
-; RV32-NEXT: vmseq.vi v16, v8, 0
-; RV32-NEXT: vsetivli zero, 0, e32, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v16
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: ret
-;
-; RV64-LABEL: extractelt_v128i1_idx0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 128
-; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; RV64-NEXT: vle8.v v8, (a0)
-; RV64-NEXT: vmseq.vi v16, v8, 0
-; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64-NEXT: vmv.x.s a0, v16
-; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: ret
-;
-; RV32ZBS-LABEL: extractelt_v128i1_idx0:
-; RV32ZBS: # %bb.0:
-; RV32ZBS-NEXT: li a1, 128
-; RV32ZBS-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; RV32ZBS-NEXT: vle8.v v8, (a0)
-; RV32ZBS-NEXT: vmseq.vi v16, v8, 0
-; RV32ZBS-NEXT: vsetivli zero, 0, e32, m1, ta, ma
-; RV32ZBS-NEXT: vmv.x.s a0, v16
-; RV32ZBS-NEXT: andi a0, a0, 1
-; RV32ZBS-NEXT: ret
-;
-; RV64ZBS-LABEL: extractelt_v128i1_idx0:
-; RV64ZBS: # %bb.0:
-; RV64ZBS-NEXT: li a1, 128
-; RV64ZBS-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; RV64ZBS-NEXT: vle8.v v8, (a0)
-; RV64ZBS-NEXT: vmseq.vi v16, v8, 0
-; RV64ZBS-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64ZBS-NEXT: vmv.x.s a0, v16
-; RV64ZBS-NEXT: andi a0, a0, 1
-; RV64ZBS-NEXT: ret
+; CHECK-LABEL: extractelt_v128i1_idx0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vmseq.vi v16, v8, 0
+; CHECK-NEXT: vfirst.m a0, v16
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
%a = load <128 x i8>, ptr %x
%b = icmp eq <128 x i8> %a, zeroinitializer
%c = extractelement <128 x i1> %b, i64 0
@@ -645,49 +572,15 @@ define i1 @extractelt_v128i1_idx0(ptr %x) nounwind {
}
define i1 @extractelt_v256i1_idx0(ptr %x) nounwind {
-; RV32-LABEL: extractelt_v256i1_idx0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 128
-; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
-; RV32-NEXT: vmseq.vi v16, v8, 0
-; RV32-NEXT: vsetivli zero, 0, e32, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v16
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: ret
-;
-; RV64-LABEL: extractelt_v256i1_idx0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 128
-; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; RV64-NEXT: vle8.v v8, (a0)
-; RV64-NEXT: vmseq.vi v16, v8, 0
-; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64-NEXT: vmv.x.s a0, v16
-; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: ret
-;
-; RV32ZBS-LABEL: extractelt_v256i1_idx0:
-; RV32ZBS: # %bb.0:
-; RV32ZBS-NEXT: li a1, 128
-; RV32ZBS-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; RV32ZBS-NEXT: vle8.v v8, (a0)
-; RV32ZBS-NEXT: vmseq.vi v16, v8, 0
-; RV32ZBS-NEXT: vsetivli zero, 0, e32, m1, ta, ma
-; RV32ZBS-NEXT: vmv.x.s a0, v16
-; RV32ZBS-NEXT: andi a0, a0, 1
-; RV32ZBS-NEXT: ret
-;
-; RV64ZBS-LABEL: extractelt_v256i1_idx0:
-; RV64ZBS: # %bb.0:
-; RV64ZBS-NEXT: li a1, 128
-; RV64ZBS-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; RV64ZBS-NEXT: vle8.v v8, (a0)
-; RV64ZBS-NEXT: vmseq.vi v16, v8, 0
-; RV64ZBS-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64ZBS-NEXT: vmv.x.s a0, v16
-; RV64ZBS-NEXT: andi a0, a0, 1
-; RV64ZBS-NEXT: ret
+; CHECK-LABEL: extractelt_v256i1_idx0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vmseq.vi v16, v8, 0
+; CHECK-NEXT: vfirst.m a0, v16
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
%a = load <256 x i8>, ptr %x
%b = icmp eq <256 x i8> %a, zeroinitializer
%c = extractelement <256 x i1> %b, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index df94ead917a73..11b059a052b10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -35,11 +35,8 @@ define <1 x i8> @mgather_v1i8(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i8> %passthru)
; RV64ZVE32F-LABEL: mgather_v1i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB0_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero
@@ -875,11 +872,8 @@ define <1 x i16> @mgather_v1i16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i16> %passthr
; RV64ZVE32F-LABEL: mgather_v1i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB13_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB13_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
@@ -2081,11 +2075,8 @@ define <1 x i32> @mgather_v1i32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i32> %passthr
; RV64ZVE32F-LABEL: mgather_v1i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB27_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB27_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
@@ -3631,11 +3622,8 @@ define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthr
; RV32ZVE32F-LABEL: mgather_v1i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT: vmv.v.i v9, 0
-; RV32ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32ZVE32F-NEXT: vmv.x.s a2, v9
-; RV32ZVE32F-NEXT: andi a2, a2, 1
-; RV32ZVE32F-NEXT: beqz a2, .LBB42_2
+; RV32ZVE32F-NEXT: vfirst.m a2, v0
+; RV32ZVE32F-NEXT: bnez a2, .LBB42_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3647,11 +3635,8 @@ define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthr
; RV64ZVE32F-LABEL: mgather_v1i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v8, 0
-; RV64ZVE32F-NEXT: vmerge.vim v8, v8, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB42_2
+; RV64ZVE32F-NEXT: vfirst.m a2, v0
+; RV64ZVE32F-NEXT: bnez a2, .LBB42_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a1, 0(a0)
; RV64ZVE32F-NEXT: .LBB42_2: # %else
@@ -7164,11 +7149,8 @@ define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passt
; RV64ZVE32F-LABEL: mgather_v1f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB58_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB58_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
@@ -8134,11 +8116,8 @@ define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %pas
; RV64ZVE32F-LABEL: mgather_v1f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB68_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB68_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
@@ -9558,11 +9537,8 @@ define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %p
; RV32ZVE32F-LABEL: mgather_v1f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT: vmv.v.i v9, 0
-; RV32ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32ZVE32F-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-NEXT: andi a0, a0, 1
-; RV32ZVE32F-NEXT: beqz a0, .LBB81_2
+; RV32ZVE32F-NEXT: vfirst.m a0, v0
+; RV32ZVE32F-NEXT: bnez a0, .LBB81_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -9573,11 +9549,8 @@ define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %p
; RV64ZVE32F-LABEL: mgather_v1f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v8, 0
-; RV64ZVE32F-NEXT: vmerge.vim v8, v8, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v8
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB81_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB81_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: fld fa0, 0(a0)
; RV64ZVE32F-NEXT: .LBB81_2: # %else
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 494f556aadcd7..aecbd1fbc5f15 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -32,11 +32,8 @@ define void @mscatter_v1i8(<1 x i8> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV64ZVE32F-LABEL: mscatter_v1i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB0_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
@@ -657,11 +654,8 @@ define void @mscatter_v1i16(<1 x i16> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV64ZVE32F-LABEL: mscatter_v1i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB10_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB10_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
@@ -1637,11 +1631,8 @@ define void @mscatter_v1i32(<1 x i32> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV64ZVE32F-LABEL: mscatter_v1i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB22_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB22_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
@@ -2942,11 +2933,8 @@ define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV32ZVE32F-LABEL: mscatter_v1i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT: vmv.v.i v9, 0
-; RV32ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32ZVE32F-NEXT: vmv.x.s a2, v9
-; RV32ZVE32F-NEXT: andi a2, a2, 1
-; RV32ZVE32F-NEXT: beqz a2, .LBB36_2
+; RV32ZVE32F-NEXT: vfirst.m a2, v0
+; RV32ZVE32F-NEXT: bnez a2, .LBB36_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
@@ -2958,11 +2946,8 @@ define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV64ZVE32F-LABEL: mscatter_v1i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v8, 0
-; RV64ZVE32F-NEXT: vmerge.vim v8, v8, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB36_2
+; RV64ZVE32F-NEXT: vfirst.m a2, v0
+; RV64ZVE32F-NEXT: bnez a2, .LBB36_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: sd a0, 0(a1)
; RV64ZVE32F-NEXT: .LBB36_2: # %else
@@ -6089,11 +6074,8 @@ define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV64ZVE32F-LABEL: mscatter_v1f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB52_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB52_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
@@ -6942,11 +6924,8 @@ define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV64ZVE32F-LABEL: mscatter_v1f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v9, 0
-; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB62_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB62_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
@@ -8191,11 +8170,8 @@ define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV32ZVE32F-LABEL: mscatter_v1f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT: vmv.v.i v9, 0
-; RV32ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32ZVE32F-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-NEXT: andi a0, a0, 1
-; RV32ZVE32F-NEXT: beqz a0, .LBB75_2
+; RV32ZVE32F-NEXT: vfirst.m a0, v0
+; RV32ZVE32F-NEXT: bnez a0, .LBB75_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -8206,11 +8182,8 @@ define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV64ZVE32F-LABEL: mscatter_v1f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.v.i v8, 0
-; RV64ZVE32F-NEXT: vmerge.vim v8, v8, 1, v0
-; RV64ZVE32F-NEXT: vmv.x.s a1, v8
-; RV64ZVE32F-NEXT: andi a1, a1, 1
-; RV64ZVE32F-NEXT: beqz a1, .LBB75_2
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB75_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: .LBB75_2: # %else
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
index 28e4c74e1c324..ef0f607de26e2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
@@ -7,45 +7,13 @@
declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>)
define signext i1 @vreduce_or_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_or_v1i1:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT: slli a0, a0, 31
-; LMULMAX1-RV32-NEXT: srai a0, a0, 31
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_or_v1i1:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT: slli a0, a0, 63
-; LMULMAX1-RV64-NEXT: srai a0, a0, 63
-; LMULMAX1-RV64-NEXT: ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_or_v1i1:
-; LMULMAX8-RV32: # %bb.0:
-; LMULMAX8-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT: slli a0, a0, 31
-; LMULMAX8-RV32-NEXT: srai a0, a0, 31
-; LMULMAX8-RV32-NEXT: ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_or_v1i1:
-; LMULMAX8-RV64: # %bb.0:
-; LMULMAX8-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT: slli a0, a0, 63
-; LMULMAX8-RV64-NEXT: srai a0, a0, 63
-; LMULMAX8-RV64-NEXT: ret
+; CHECK-LABEL: vreduce_or_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
%red = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %v)
ret i1 %red
}
@@ -53,45 +21,13 @@ define signext i1 @vreduce_or_v1i1(<1 x i1> %v) {
declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>)
define signext i1 @vreduce_xor_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_xor_v1i1:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT: slli a0, a0, 31
-; LMULMAX1-RV32-NEXT: srai a0, a0, 31
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_xor_v1i1:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT: slli a0, a0, 63
-; LMULMAX1-RV64-NEXT: srai a0, a0, 63
-; LMULMAX1-RV64-NEXT: ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_xor_v1i1:
-; LMULMAX8-RV32: # %bb.0:
-; LMULMAX8-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT: slli a0, a0, 31
-; LMULMAX8-RV32-NEXT: srai a0, a0, 31
-; LMULMAX8-RV32-NEXT: ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_xor_v1i1:
-; LMULMAX8-RV64: # %bb.0:
-; LMULMAX8-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT: slli a0, a0, 63
-; LMULMAX8-RV64-NEXT: srai a0, a0, 63
-; LMULMAX8-RV64-NEXT: ret
+; CHECK-LABEL: vreduce_xor_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
%red = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %v)
ret i1 %red
}
@@ -99,45 +35,13 @@ define signext i1 @vreduce_xor_v1i1(<1 x i1> %v) {
declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>)
define signext i1 @vreduce_and_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_and_v1i1:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT: slli a0, a0, 31
-; LMULMAX1-RV32-NEXT: srai a0, a0, 31
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_and_v1i1:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT: slli a0, a0, 63
-; LMULMAX1-RV64-NEXT: srai a0, a0, 63
-; LMULMAX1-RV64-NEXT: ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_and_v1i1:
-; LMULMAX8-RV32: # %bb.0:
-; LMULMAX8-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT: slli a0, a0, 31
-; LMULMAX8-RV32-NEXT: srai a0, a0, 31
-; LMULMAX8-RV32-NEXT: ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_and_v1i1:
-; LMULMAX8-RV64: # %bb.0:
-; LMULMAX8-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT: slli a0, a0, 63
-; LMULMAX8-RV64-NEXT: srai a0, a0, 63
-; LMULMAX8-RV64-NEXT: ret
+; CHECK-LABEL: vreduce_and_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
%red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
ret i1 %red
}
@@ -145,45 +49,13 @@ define signext i1 @vreduce_and_v1i1(<1 x i1> %v) {
declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1>)
define signext i1 @vreduce_umax_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_umax_v1i1:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT: slli a0, a0, 31
-; LMULMAX1-RV32-NEXT: srai a0, a0, 31
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_umax_v1i1:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT: slli a0, a0, 63
-; LMULMAX1-RV64-NEXT: srai a0, a0, 63
-; LMULMAX1-RV64-NEXT: ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_umax_v1i1:
-; LMULMAX8-RV32: # %bb.0:
-; LMULMAX8-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT: slli a0, a0, 31
-; LMULMAX8-RV32-NEXT: srai a0, a0, 31
-; LMULMAX8-RV32-NEXT: ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_umax_v1i1:
-; LMULMAX8-RV64: # %bb.0:
-; LMULMAX8-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT: slli a0, a0, 63
-; LMULMAX8-RV64-NEXT: srai a0, a0, 63
-; LMULMAX8-RV64-NEXT: ret
+; CHECK-LABEL: vreduce_umax_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
%red = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %v)
ret i1 %red
}
@@ -191,45 +63,13 @@ define signext i1 @vreduce_umax_v1i1(<1 x i1> %v) {
declare i1 @llvm.vector.reduce.smax.v1i1(<1 x i1>)
define signext i1 @vreduce_smax_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_smax_v1i1:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT: slli a0, a0, 31
-; LMULMAX1-RV32-NEXT: srai a0, a0, 31
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_smax_v1i1:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT: slli a0, a0, 63
-; LMULMAX1-RV64-NEXT: srai a0, a0, 63
-; LMULMAX1-RV64-NEXT: ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_smax_v1i1:
-; LMULMAX8-RV32: # %bb.0:
-; LMULMAX8-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT: slli a0, a0, 31
-; LMULMAX8-RV32-NEXT: srai a0, a0, 31
-; LMULMAX8-RV32-NEXT: ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_smax_v1i1:
-; LMULMAX8-RV64: # %bb.0:
-; LMULMAX8-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT: slli a0, a0, 63
-; LMULMAX8-RV64-NEXT: srai a0, a0, 63
-; LMULMAX8-RV64-NEXT: ret
+; CHECK-LABEL: vreduce_smax_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
%red = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> %v)
ret i1 %red
}
@@ -237,45 +77,13 @@ define signext i1 @vreduce_smax_v1i1(<1 x i1> %v) {
declare i1 @llvm.vector.reduce.umin.v1i1(<1 x i1>)
define signext i1 @vreduce_umin_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_umin_v1i1:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT: slli a0, a0, 31
-; LMULMAX1-RV32-NEXT: srai a0, a0, 31
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_umin_v1i1:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT: slli a0, a0, 63
-; LMULMAX1-RV64-NEXT: srai a0, a0, 63
-; LMULMAX1-RV64-NEXT: ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_umin_v1i1:
-; LMULMAX8-RV32: # %bb.0:
-; LMULMAX8-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT: slli a0, a0, 31
-; LMULMAX8-RV32-NEXT: srai a0, a0, 31
-; LMULMAX8-RV32-NEXT: ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_umin_v1i1:
-; LMULMAX8-RV64: # %bb.0:
-; LMULMAX8-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT: slli a0, a0, 63
-; LMULMAX8-RV64-NEXT: srai a0, a0, 63
-; LMULMAX8-RV64-NEXT: ret
+; CHECK-LABEL: vreduce_umin_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
%red = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> %v)
ret i1 %red
}
@@ -283,45 +91,13 @@ define signext i1 @vreduce_umin_v1i1(<1 x i1> %v) {
declare i1 @llvm.vector.reduce.smin.v1i1(<1 x i1>)
define signext i1 @vreduce_smin_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_smin_v1i1:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT: slli a0, a0, 31
-; LMULMAX1-RV32-NEXT: srai a0, a0, 31
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_smin_v1i1:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT: slli a0, a0, 63
-; LMULMAX1-RV64-NEXT: srai a0, a0, 63
-; LMULMAX1-RV64-NEXT: ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_smin_v1i1:
-; LMULMAX8-RV32: # %bb.0:
-; LMULMAX8-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT: slli a0, a0, 31
-; LMULMAX8-RV32-NEXT: srai a0, a0, 31
-; LMULMAX8-RV32-NEXT: ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_smin_v1i1:
-; LMULMAX8-RV64: # %bb.0:
-; LMULMAX8-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT: slli a0, a0, 63
-; LMULMAX8-RV64-NEXT: srai a0, a0, 63
-; LMULMAX8-RV64-NEXT: ret
+; CHECK-LABEL: vreduce_smin_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
%red = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> %v)
ret i1 %red
}
@@ -1223,45 +999,13 @@ define signext i1 @vreduce_smin_v64i1(<64 x i1> %v) {
declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1>)
define signext i1 @vreduce_add_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_add_v1i1:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT: slli a0, a0, 31
-; LMULMAX1-RV32-NEXT: srai a0, a0, 31
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_add_v1i1:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT: slli a0, a0, 63
-; LMULMAX1-RV64-NEXT: srai a0, a0, 63
-; LMULMAX1-RV64-NEXT: ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_add_v1i1:
-; LMULMAX8-RV32: # %bb.0:
-; LMULMAX8-RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT: slli a0, a0, 31
-; LMULMAX8-RV32-NEXT: srai a0, a0, 31
-; LMULMAX8-RV32-NEXT: ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_add_v1i1:
-; LMULMAX8-RV64: # %bb.0:
-; LMULMAX8-RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT: vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT: slli a0, a0, 63
-; LMULMAX8-RV64-NEXT: srai a0, a0, 63
-; LMULMAX8-RV64-NEXT: ret
+; CHECK-LABEL: vreduce_add_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
%red = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %v)
ret i1 %red
}
More information about the llvm-commits
mailing list