[llvm] db3f324 - [RISCV] Use vfirst.m to extract the first element from mask vector.

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 2 19:24:27 PST 2023


Author: jacquesguan
Date: 2023-01-03T11:24:18+08:00
New Revision: db3f3243bb85253eaf0e483452fdbfd5baf7ffd1

URL: https://github.com/llvm/llvm-project/commit/db3f3243bb85253eaf0e483452fdbfd5baf7ffd1
DIFF: https://github.com/llvm/llvm-project/commit/db3f3243bb85253eaf0e483452fdbfd5baf7ffd1.diff

LOG: [RISCV] Use vfirst.m to extract the first element from mask vector.

This patch uses vfirst.m to extract the first bit of mask.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D139512

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
    llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
    llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f89fc2bfe6953..55edf98b3de4e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5266,6 +5266,19 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   MVT XLenVT = Subtarget.getXLenVT();
 
   if (VecVT.getVectorElementType() == MVT::i1) {
+    // Use vfirst.m to extract the first bit.
+    if (isNullConstant(Idx)) {
+      MVT ContainerVT = VecVT;
+      if (VecVT.isFixedLengthVector()) {
+        ContainerVT = getContainerForFixedLengthVector(VecVT);
+        Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+      }
+      auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+      SDValue Vfirst =
+          DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
+      return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT),
+                          ISD::SETEQ);
+    }
     if (VecVT.isFixedLengthVector()) {
       unsigned NumElts = VecVT.getVectorNumElements();
       if (NumElts >= 8) {

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 7d4baaa141b5a..a20b0891783be 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -301,6 +301,9 @@ enum NodeType : unsigned {
   //  vcpop.m with additional mask and VL operands.
   VCPOP_VL,
 
+  //  vfirst.m with additional mask and VL operands.
+  VFIRST_VL,
+
   // Reads value of CSR.
   // The first operand is a chain pointer. The second specifies address of the
   // required CSR. Two results are produced, the read value and the new chain

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index c6804b6011ebf..e54003e1576c5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -228,6 +228,13 @@ def riscv_vcpop_vl : SDNode<"RISCVISD::VCPOP_VL",
                                                  SDTCisSameNumEltsAs<1, 2>,
                                                  SDTCisVT<3, XLenVT>]>>;
 
+def riscv_vfirst_vl : SDNode<"RISCVISD::VFIRST_VL",
+                            SDTypeProfile<1, 3, [SDTCisVT<0, XLenVT>,
+                                                 SDTCisVec<1>, SDTCisInt<1>,
+                                                 SDTCVecEltisVT<2, i1>,
+                                                 SDTCisSameNumEltsAs<1, 2>,
+                                                 SDTCisVT<3, XLenVT>]>>;
+
 def SDT_RISCVVEXTEND_VL : SDTypeProfile<1, 3, [SDTCisVec<0>,
                                                SDTCisSameNumEltsAs<0, 1>,
                                                SDTCisSameNumEltsAs<1, 2>,
@@ -1829,6 +1836,16 @@ foreach mti = AllMasks in {
                                     VLOpFrag)),
             (!cast<Instruction>("PseudoVCPOP_M_" # mti.BX # "_MASK")
                  VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
+
+  // 16.3 vfirst find-first-set mask bit
+  def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
+                                    VLOpFrag)),
+            (!cast<Instruction>("PseudoVFIRST_M_" # mti.BX)
+                 VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+  def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask V0),
+                                    VLOpFrag)),
+            (!cast<Instruction>("PseudoVFIRST_M_" # mti.BX # "_MASK")
+                 VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
 }
 
 } // Predicates = [HasVInstructions]

diff  --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
index df1314070ce72..8089d0adedc94 100644
--- a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
@@ -27,15 +27,10 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan
 ; RV32-NEXT:    vmsne.vi v0, v11, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vmerge.vvm v8, v9, v8, v0
-; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; RV32-NEXT:    vmv.v.i v9, 0
-; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.x.s a0, v8
-; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmerge.vim v8, v9, 1, v0
-; RV32-NEXT:    vmv.x.s a1, v8
-; RV32-NEXT:    andi a1, a1, 1
+; RV32-NEXT:    vfirst.m a1, v10
+; RV32-NEXT:    seqz a1, a1
+; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vmv.v.x v8, a1
 ; RV32-NEXT:    vmsne.vi v0, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -54,15 +49,10 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan
 ; RV64-NEXT:    vmsne.vi v0, v13, 0
 ; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV64-NEXT:    vmerge.vvm v8, v10, v8, v0
-; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; RV64-NEXT:    vmv.v.i v10, 0
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV64-NEXT:    vmv.x.s a0, v8
-; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmerge.vim v8, v10, 1, v0
-; RV64-NEXT:    vmv.x.s a1, v8
-; RV64-NEXT:    andi a1, a1, 1
+; RV64-NEXT:    vfirst.m a1, v12
+; RV64-NEXT:    seqz a1, a1
+; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vmv.v.x v8, a1
 ; RV64-NEXT:    vmsne.vi v0, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma

diff  --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index dc3bc4b27abeb..81fdef107d4db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -219,10 +219,9 @@ define i1 @extractelt_nxv1i1_idx0(<vscale x 1 x i8>* %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 1 x i8>, <vscale x 1 x i8>* %x
   %b = icmp eq <vscale x 1 x i8> %a, zeroinitializer
@@ -235,10 +234,9 @@ define i1 @extractelt_nxv2i1_idx0(<vscale x 2 x i8>* %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 2 x i8>, <vscale x 2 x i8>* %x
   %b = icmp eq <vscale x 2 x i8> %a, zeroinitializer
@@ -251,10 +249,9 @@ define i1 @extractelt_nxv4i1_idx0(<vscale x 4 x i8>* %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 4 x i8>, <vscale x 4 x i8>* %x
   %b = icmp eq <vscale x 4 x i8> %a, zeroinitializer
@@ -267,10 +264,9 @@ define i1 @extractelt_nxv8i1_idx0(<vscale x 8 x i8>* %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 8 x i8>, <vscale x 8 x i8>* %x
   %b = icmp eq <vscale x 8 x i8> %a, zeroinitializer
@@ -283,10 +279,9 @@ define i1 @extractelt_nxv16i1_idx0(<vscale x 16 x i8>* %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v10, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v10
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 16 x i8>, <vscale x 16 x i8>* %x
   %b = icmp eq <vscale x 16 x i8> %a, zeroinitializer
@@ -299,10 +294,9 @@ define i1 @extractelt_nxv32i1_idx0(<vscale x 32 x i8>* %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v12, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v12
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 32 x i8>, <vscale x 32 x i8>* %x
   %b = icmp eq <vscale x 32 x i8> %a, zeroinitializer
@@ -315,10 +309,9 @@ define i1 @extractelt_nxv64i1_idx0(<vscale x 64 x i8>* %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl8r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v16, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v16
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 64 x i8>, <vscale x 64 x i8>* %x
   %b = icmp eq <vscale x 64 x i8> %a, zeroinitializer

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
index 3bd69a1388a8f..2d609adaf0e4e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
@@ -17,10 +17,11 @@ define <2 x i1> @reverse_v2i1(<2 x i1> %a) {
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v9, v8, 1
-; CHECK-NEXT:    vmv.x.s a0, v9
-; CHECK-NEXT:    vmv.x.s a1, v8
+; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a1, v0
+; CHECK-NEXT:    seqz a1, a1
 ; CHECK-NEXT:    vmv.v.x v8, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
 ; CHECK-NEXT:    vmv.s.x v8, a0
@@ -40,9 +41,7 @@ define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    addi a0, sp, 15
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    vslidedown.vi v9, v8, 1
 ; CHECK-NEXT:    addi a0, sp, 14
 ; CHECK-NEXT:    vse8.v v9, (a0)
@@ -53,6 +52,9 @@ define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
 ; CHECK-NEXT:    addi a0, sp, 12
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vfirst.m a1, v0
+; CHECK-NEXT:    seqz a1, a1
+; CHECK-NEXT:    sb a1, 15(sp)
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
@@ -67,10 +69,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV32-BITS-UNKNOWN:       # %bb.0:
 ; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, -16
 ; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 16
-; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT:    sb a0, 15(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT:    andi a1, a0, 1
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 15(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 30
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
 ; RV32-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
@@ -93,7 +96,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
 ; RV32-BITS-UNKNOWN-NEXT:    sb a0, 8(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    addi a0, sp, 8
-; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-BITS-UNKNOWN-NEXT:    vle8.v v8, (a0)
 ; RV32-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
@@ -104,10 +106,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV32-BITS-256:       # %bb.0:
 ; RV32-BITS-256-NEXT:    addi sp, sp, -16
 ; RV32-BITS-256-NEXT:    .cfi_def_cfa_offset 16
-; RV32-BITS-256-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-BITS-256-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-256-NEXT:    vfirst.m a0, v0
+; RV32-BITS-256-NEXT:    seqz a0, a0
+; RV32-BITS-256-NEXT:    sb a0, 15(sp)
 ; RV32-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-256-NEXT:    andi a1, a0, 1
-; RV32-BITS-256-NEXT:    sb a1, 15(sp)
 ; RV32-BITS-256-NEXT:    slli a1, a0, 30
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
 ; RV32-BITS-256-NEXT:    sb a1, 14(sp)
@@ -130,7 +133,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV32-BITS-256-NEXT:    srli a0, a0, 31
 ; RV32-BITS-256-NEXT:    sb a0, 8(sp)
 ; RV32-BITS-256-NEXT:    addi a0, sp, 8
-; RV32-BITS-256-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-BITS-256-NEXT:    vle8.v v8, (a0)
 ; RV32-BITS-256-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-256-NEXT:    vmsne.vi v0, v8, 0
@@ -141,10 +143,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV32-BITS-512:       # %bb.0:
 ; RV32-BITS-512-NEXT:    addi sp, sp, -16
 ; RV32-BITS-512-NEXT:    .cfi_def_cfa_offset 16
-; RV32-BITS-512-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-BITS-512-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-512-NEXT:    vfirst.m a0, v0
+; RV32-BITS-512-NEXT:    seqz a0, a0
+; RV32-BITS-512-NEXT:    sb a0, 15(sp)
 ; RV32-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-512-NEXT:    andi a1, a0, 1
-; RV32-BITS-512-NEXT:    sb a1, 15(sp)
 ; RV32-BITS-512-NEXT:    slli a1, a0, 30
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
 ; RV32-BITS-512-NEXT:    sb a1, 14(sp)
@@ -167,7 +170,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV32-BITS-512-NEXT:    srli a0, a0, 31
 ; RV32-BITS-512-NEXT:    sb a0, 8(sp)
 ; RV32-BITS-512-NEXT:    addi a0, sp, 8
-; RV32-BITS-512-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-BITS-512-NEXT:    vle8.v v8, (a0)
 ; RV32-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-512-NEXT:    vmsne.vi v0, v8, 0
@@ -178,10 +180,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, -16
 ; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 16
-; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT:    sb a0, 15(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT:    andi a1, a0, 1
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 15(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 62
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
 ; RV64-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
@@ -204,7 +207,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 63
 ; RV64-BITS-UNKNOWN-NEXT:    sb a0, 8(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    addi a0, sp, 8
-; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vle8.v v8, (a0)
 ; RV64-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
@@ -215,10 +217,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV64-BITS-256:       # %bb.0:
 ; RV64-BITS-256-NEXT:    addi sp, sp, -16
 ; RV64-BITS-256-NEXT:    .cfi_def_cfa_offset 16
-; RV64-BITS-256-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-BITS-256-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-256-NEXT:    vfirst.m a0, v0
+; RV64-BITS-256-NEXT:    seqz a0, a0
+; RV64-BITS-256-NEXT:    sb a0, 15(sp)
 ; RV64-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-256-NEXT:    andi a1, a0, 1
-; RV64-BITS-256-NEXT:    sb a1, 15(sp)
 ; RV64-BITS-256-NEXT:    slli a1, a0, 62
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
 ; RV64-BITS-256-NEXT:    sb a1, 14(sp)
@@ -241,7 +244,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV64-BITS-256-NEXT:    srli a0, a0, 63
 ; RV64-BITS-256-NEXT:    sb a0, 8(sp)
 ; RV64-BITS-256-NEXT:    addi a0, sp, 8
-; RV64-BITS-256-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-BITS-256-NEXT:    vle8.v v8, (a0)
 ; RV64-BITS-256-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-256-NEXT:    vmsne.vi v0, v8, 0
@@ -252,10 +254,11 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    addi sp, sp, -16
 ; RV64-BITS-512-NEXT:    .cfi_def_cfa_offset 16
-; RV64-BITS-512-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-BITS-512-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-512-NEXT:    vfirst.m a0, v0
+; RV64-BITS-512-NEXT:    seqz a0, a0
+; RV64-BITS-512-NEXT:    sb a0, 15(sp)
 ; RV64-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-512-NEXT:    andi a1, a0, 1
-; RV64-BITS-512-NEXT:    sb a1, 15(sp)
 ; RV64-BITS-512-NEXT:    slli a1, a0, 62
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
 ; RV64-BITS-512-NEXT:    sb a1, 14(sp)
@@ -278,7 +281,6 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV64-BITS-512-NEXT:    srli a0, a0, 63
 ; RV64-BITS-512-NEXT:    sb a0, 8(sp)
 ; RV64-BITS-512-NEXT:    addi a0, sp, 8
-; RV64-BITS-512-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-BITS-512-NEXT:    vle8.v v8, (a0)
 ; RV64-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-512-NEXT:    vmsne.vi v0, v8, 0
@@ -293,10 +295,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
 ; RV32-BITS-UNKNOWN:       # %bb.0:
 ; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, -16
 ; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 16
+; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT:    sb a0, 15(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT:    andi a1, a0, 1
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 15(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 30
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
 ; RV32-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
@@ -354,10 +358,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
 ; RV32-BITS-256:       # %bb.0:
 ; RV32-BITS-256-NEXT:    addi sp, sp, -16
 ; RV32-BITS-256-NEXT:    .cfi_def_cfa_offset 16
+; RV32-BITS-256-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-256-NEXT:    vfirst.m a0, v0
+; RV32-BITS-256-NEXT:    seqz a0, a0
+; RV32-BITS-256-NEXT:    sb a0, 15(sp)
 ; RV32-BITS-256-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV32-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-256-NEXT:    andi a1, a0, 1
-; RV32-BITS-256-NEXT:    sb a1, 15(sp)
 ; RV32-BITS-256-NEXT:    slli a1, a0, 30
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
 ; RV32-BITS-256-NEXT:    sb a1, 14(sp)
@@ -415,10 +421,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
 ; RV32-BITS-512:       # %bb.0:
 ; RV32-BITS-512-NEXT:    addi sp, sp, -16
 ; RV32-BITS-512-NEXT:    .cfi_def_cfa_offset 16
+; RV32-BITS-512-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-512-NEXT:    vfirst.m a0, v0
+; RV32-BITS-512-NEXT:    seqz a0, a0
+; RV32-BITS-512-NEXT:    sb a0, 15(sp)
 ; RV32-BITS-512-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV32-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-512-NEXT:    andi a1, a0, 1
-; RV32-BITS-512-NEXT:    sb a1, 15(sp)
 ; RV32-BITS-512-NEXT:    slli a1, a0, 30
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
 ; RV32-BITS-512-NEXT:    sb a1, 14(sp)
@@ -476,10 +484,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, -16
 ; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 16
+; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT:    sb a0, 15(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT:    andi a1, a0, 1
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 15(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 62
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
 ; RV64-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
@@ -537,10 +547,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
 ; RV64-BITS-256:       # %bb.0:
 ; RV64-BITS-256-NEXT:    addi sp, sp, -16
 ; RV64-BITS-256-NEXT:    .cfi_def_cfa_offset 16
+; RV64-BITS-256-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-256-NEXT:    vfirst.m a0, v0
+; RV64-BITS-256-NEXT:    seqz a0, a0
+; RV64-BITS-256-NEXT:    sb a0, 15(sp)
 ; RV64-BITS-256-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV64-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-256-NEXT:    andi a1, a0, 1
-; RV64-BITS-256-NEXT:    sb a1, 15(sp)
 ; RV64-BITS-256-NEXT:    slli a1, a0, 62
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
 ; RV64-BITS-256-NEXT:    sb a1, 14(sp)
@@ -598,10 +610,12 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    addi sp, sp, -16
 ; RV64-BITS-512-NEXT:    .cfi_def_cfa_offset 16
+; RV64-BITS-512-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-512-NEXT:    vfirst.m a0, v0
+; RV64-BITS-512-NEXT:    seqz a0, a0
+; RV64-BITS-512-NEXT:    sb a0, 15(sp)
 ; RV64-BITS-512-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV64-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-512-NEXT:    andi a1, a0, 1
-; RV64-BITS-512-NEXT:    sb a1, 15(sp)
 ; RV64-BITS-512-NEXT:    slli a1, a0, 62
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
 ; RV64-BITS-512-NEXT:    sb a1, 14(sp)
@@ -670,103 +684,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
 ; RV32-BITS-UNKNOWN-NEXT:    addi s0, sp, 64
 ; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-BITS-UNKNOWN-NEXT:    andi sp, sp, -32
-; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT:    andi a1, a0, 1
+; RV32-BITS-UNKNOWN-NEXT:    li a0, 32
+; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a1, v0
+; RV32-BITS-UNKNOWN-NEXT:    seqz a1, a1
 ; RV32-BITS-UNKNOWN-NEXT:    sb a1, 31(sp)
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a0, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 30
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 30(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 29(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 28(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 27
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 27(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 26(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 25(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 24
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 24(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 23
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 23(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 22
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 22(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 21
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 21(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 20
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 20(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 19
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 19(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 18
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 18(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 17
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 17(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 16
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 16(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 15
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 15(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 14
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 13
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 12
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 11
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 10
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 9
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 8
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 7
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 6
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 5
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 4
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 3
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 2
+; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 0(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 30
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 30(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 29
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 29(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 28
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 28(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 27
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 27(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 26
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 26(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 25
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 25(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 24
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 24(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 23
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 23(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 22
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 22(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 21
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 21(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 20
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 20(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 19
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 19(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 18
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 18(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 17
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 17(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 16
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 16(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 15
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 15(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 14
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 14(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 13
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 13(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 12
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 12(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 11
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 11(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 10
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 10(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 9
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 9(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 8
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 8(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 7
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 7(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 6
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 6(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 5
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 5(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 4
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 4(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 3
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 3(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 2
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 2(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a1, 1
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a0, a0, 1
-; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a0, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT:    li a0, 32
+; RV32-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    mv a1, sp
 ; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 ; RV32-BITS-UNKNOWN-NEXT:    vle8.v v8, (a1)
@@ -789,103 +805,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
 ; RV32-BITS-256-NEXT:    addi s0, sp, 64
 ; RV32-BITS-256-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-BITS-256-NEXT:    andi sp, sp, -32
-; RV32-BITS-256-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-256-NEXT:    andi a1, a0, 1
+; RV32-BITS-256-NEXT:    li a0, 32
+; RV32-BITS-256-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; RV32-BITS-256-NEXT:    vfirst.m a1, v0
+; RV32-BITS-256-NEXT:    seqz a1, a1
 ; RV32-BITS-256-NEXT:    sb a1, 31(sp)
-; RV32-BITS-256-NEXT:    srli a1, a0, 31
-; RV32-BITS-256-NEXT:    sb a1, 0(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 30
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 30(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 29
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 29(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 28
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 28(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 27
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 27(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 26
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 26(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 25
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 25(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 24
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 24(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 23
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 23(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 22
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 22(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 21
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 21(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 20
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 20(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 19
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 19(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 18
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 18(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 17
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 17(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 16
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 16(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 15
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 15(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 14
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 14(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 13
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 13(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 12
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 12(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 11
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 11(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 10
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 10(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 9
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 9(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 8
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 8(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 7
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 7(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 6
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 6(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 5
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 5(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 4
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 4(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 3
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 3(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 2
+; RV32-BITS-256-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-256-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-256-NEXT:    srli a2, a1, 31
+; RV32-BITS-256-NEXT:    sb a2, 0(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 30
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 30(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 29
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 29(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 28
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 28(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 27
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 27(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 26
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 26(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 25
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 25(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 24
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 24(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 23
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 23(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 22
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 22(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 21
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 21(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 20
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 20(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 19
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 19(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 18
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 18(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 17
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 17(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 16
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 16(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 15
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 15(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 14
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 14(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 13
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 13(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 12
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 12(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 11
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 11(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 10
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 10(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 9
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 9(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 8
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 8(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 7
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 7(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 6
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 6(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 5
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 5(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 4
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 4(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 3
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 3(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 2
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 2(sp)
+; RV32-BITS-256-NEXT:    slli a1, a1, 1
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 2(sp)
-; RV32-BITS-256-NEXT:    slli a0, a0, 1
-; RV32-BITS-256-NEXT:    srli a0, a0, 31
-; RV32-BITS-256-NEXT:    sb a0, 1(sp)
-; RV32-BITS-256-NEXT:    li a0, 32
+; RV32-BITS-256-NEXT:    sb a1, 1(sp)
 ; RV32-BITS-256-NEXT:    mv a1, sp
 ; RV32-BITS-256-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 ; RV32-BITS-256-NEXT:    vle8.v v8, (a1)
@@ -908,103 +926,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
 ; RV32-BITS-512-NEXT:    addi s0, sp, 64
 ; RV32-BITS-512-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-BITS-512-NEXT:    andi sp, sp, -32
-; RV32-BITS-512-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-512-NEXT:    andi a1, a0, 1
+; RV32-BITS-512-NEXT:    li a0, 32
+; RV32-BITS-512-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; RV32-BITS-512-NEXT:    vfirst.m a1, v0
+; RV32-BITS-512-NEXT:    seqz a1, a1
 ; RV32-BITS-512-NEXT:    sb a1, 31(sp)
-; RV32-BITS-512-NEXT:    srli a1, a0, 31
-; RV32-BITS-512-NEXT:    sb a1, 0(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 30
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 30(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 29
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 29(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 28
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 28(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 27
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 27(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 26
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 26(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 25
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 25(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 24
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 24(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 23
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 23(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 22
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 22(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 21
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 21(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 20
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 20(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 19
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 19(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 18
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 18(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 17
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 17(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 16
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 16(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 15
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 15(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 14
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 14(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 13
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 13(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 12
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 12(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 11
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 11(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 10
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 10(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 9
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 9(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 8
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 8(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 7
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 7(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 6
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 6(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 5
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 5(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 4
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 4(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 3
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 3(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 2
+; RV32-BITS-512-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-512-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-512-NEXT:    srli a2, a1, 31
+; RV32-BITS-512-NEXT:    sb a2, 0(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 30
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 30(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 29
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 29(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 28
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 28(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 27
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 27(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 26
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 26(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 25
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 25(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 24
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 24(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 23
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 23(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 22
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 22(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 21
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 21(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 20
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 20(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 19
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 19(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 18
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 18(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 17
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 17(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 16
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 16(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 15
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 15(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 14
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 14(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 13
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 13(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 12
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 12(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 11
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 11(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 10
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 10(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 9
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 9(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 8
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 8(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 7
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 7(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 6
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 6(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 5
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 5(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 4
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 4(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 3
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 3(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 2
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 2(sp)
+; RV32-BITS-512-NEXT:    slli a1, a1, 1
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 2(sp)
-; RV32-BITS-512-NEXT:    slli a0, a0, 1
-; RV32-BITS-512-NEXT:    srli a0, a0, 31
-; RV32-BITS-512-NEXT:    sb a0, 1(sp)
-; RV32-BITS-512-NEXT:    li a0, 32
+; RV32-BITS-512-NEXT:    sb a1, 1(sp)
 ; RV32-BITS-512-NEXT:    mv a1, sp
 ; RV32-BITS-512-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 ; RV32-BITS-512-NEXT:    vle8.v v8, (a1)
@@ -1027,103 +1047,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
 ; RV64-BITS-UNKNOWN-NEXT:    addi s0, sp, 64
 ; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-BITS-UNKNOWN-NEXT:    andi sp, sp, -32
-; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT:    andi a1, a0, 1
+; RV64-BITS-UNKNOWN-NEXT:    li a0, 32
+; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a1, v0
+; RV64-BITS-UNKNOWN-NEXT:    seqz a1, a1
 ; RV64-BITS-UNKNOWN-NEXT:    sb a1, 31(sp)
-; RV64-BITS-UNKNOWN-NEXT:    srliw a1, a0, 31
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 62
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 30(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 61
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 29(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 60
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 28(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 59
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 27(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 58
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 26(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 57
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 25(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 56
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 24(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 55
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 23(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 54
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 22(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 53
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 21(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 52
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 20(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 51
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 19(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 50
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 18(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 49
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 17(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 48
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 16(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 47
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 15(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 46
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 45
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 44
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 43
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 42
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 41
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 40
+; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v0
+; RV64-BITS-UNKNOWN-NEXT:    srliw a2, a1, 31
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 0(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 62
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 30(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 61
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 29(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 60
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 28(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 59
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 27(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 58
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 26(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 57
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 25(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 56
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 24(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 55
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 23(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 54
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 22(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 53
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 21(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 52
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 20(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 51
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 19(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 50
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 18(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 49
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 17(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 48
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 16(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 47
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 15(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 46
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 14(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 45
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 13(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 44
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 12(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 43
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 11(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 42
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 10(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 41
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 9(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 40
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 8(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 39
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 7(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 38
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 6(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 37
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 5(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 36
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 4(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 35
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 3(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 34
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 2(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a1, 33
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 39
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 38
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 37
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 36
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 35
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 34
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 33
-; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a0, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT:    li a0, 32
+; RV64-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    mv a1, sp
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vle8.v v8, (a1)
@@ -1146,103 +1168,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
 ; RV64-BITS-256-NEXT:    addi s0, sp, 64
 ; RV64-BITS-256-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-BITS-256-NEXT:    andi sp, sp, -32
-; RV64-BITS-256-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-256-NEXT:    andi a1, a0, 1
+; RV64-BITS-256-NEXT:    li a0, 32
+; RV64-BITS-256-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; RV64-BITS-256-NEXT:    vfirst.m a1, v0
+; RV64-BITS-256-NEXT:    seqz a1, a1
 ; RV64-BITS-256-NEXT:    sb a1, 31(sp)
-; RV64-BITS-256-NEXT:    srliw a1, a0, 31
-; RV64-BITS-256-NEXT:    sb a1, 0(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 62
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 30(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 61
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 29(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 60
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 28(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 59
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 27(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 58
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 26(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 57
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 25(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 56
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 24(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 55
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 23(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 54
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 22(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 53
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 21(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 52
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 20(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 51
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 19(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 50
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 18(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 49
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 17(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 48
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 16(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 47
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 15(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 46
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 14(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 45
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 13(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 44
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 12(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 43
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 11(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 42
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 10(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 41
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 9(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 40
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 8(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 39
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 7(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 38
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 6(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 37
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 5(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 36
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 4(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 35
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 3(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 34
+; RV64-BITS-256-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
+; RV64-BITS-256-NEXT:    vmv.x.s a1, v0
+; RV64-BITS-256-NEXT:    srliw a2, a1, 31
+; RV64-BITS-256-NEXT:    sb a2, 0(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 62
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 30(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 61
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 29(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 60
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 28(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 59
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 27(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 58
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 26(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 57
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 25(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 56
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 24(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 55
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 23(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 54
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 22(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 53
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 21(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 52
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 20(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 51
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 19(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 50
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 18(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 49
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 17(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 48
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 16(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 47
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 15(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 46
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 14(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 45
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 13(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 44
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 12(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 43
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 11(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 42
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 10(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 41
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 9(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 40
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 8(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 39
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 7(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 38
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 6(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 37
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 5(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 36
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 4(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 35
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 3(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 34
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 2(sp)
+; RV64-BITS-256-NEXT:    slli a1, a1, 33
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 2(sp)
-; RV64-BITS-256-NEXT:    slli a0, a0, 33
-; RV64-BITS-256-NEXT:    srli a0, a0, 63
-; RV64-BITS-256-NEXT:    sb a0, 1(sp)
-; RV64-BITS-256-NEXT:    li a0, 32
+; RV64-BITS-256-NEXT:    sb a1, 1(sp)
 ; RV64-BITS-256-NEXT:    mv a1, sp
 ; RV64-BITS-256-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 ; RV64-BITS-256-NEXT:    vle8.v v8, (a1)
@@ -1265,103 +1289,105 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
 ; RV64-BITS-512-NEXT:    addi s0, sp, 64
 ; RV64-BITS-512-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-BITS-512-NEXT:    andi sp, sp, -32
-; RV64-BITS-512-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-512-NEXT:    andi a1, a0, 1
+; RV64-BITS-512-NEXT:    li a0, 32
+; RV64-BITS-512-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; RV64-BITS-512-NEXT:    vfirst.m a1, v0
+; RV64-BITS-512-NEXT:    seqz a1, a1
 ; RV64-BITS-512-NEXT:    sb a1, 31(sp)
-; RV64-BITS-512-NEXT:    srliw a1, a0, 31
-; RV64-BITS-512-NEXT:    sb a1, 0(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 62
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 30(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 61
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 29(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 60
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 28(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 59
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 27(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 58
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 26(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 57
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 25(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 56
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 24(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 55
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 23(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 54
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 22(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 53
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 21(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 52
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 20(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 51
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 19(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 50
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 18(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 49
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 17(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 48
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 16(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 47
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 15(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 46
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 14(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 45
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 13(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 44
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 12(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 43
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 11(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 42
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 10(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 41
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 9(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 40
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 8(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 39
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 7(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 38
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 6(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 37
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 5(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 36
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 4(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 35
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 3(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 34
+; RV64-BITS-512-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
+; RV64-BITS-512-NEXT:    vmv.x.s a1, v0
+; RV64-BITS-512-NEXT:    srliw a2, a1, 31
+; RV64-BITS-512-NEXT:    sb a2, 0(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 62
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 30(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 61
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 29(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 60
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 28(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 59
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 27(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 58
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 26(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 57
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 25(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 56
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 24(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 55
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 23(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 54
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 22(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 53
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 21(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 52
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 20(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 51
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 19(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 50
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 18(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 49
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 17(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 48
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 16(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 47
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 15(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 46
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 14(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 45
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 13(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 44
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 12(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 43
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 11(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 42
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 10(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 41
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 9(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 40
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 8(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 39
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 7(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 38
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 6(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 37
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 5(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 36
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 4(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 35
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 3(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 34
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 2(sp)
+; RV64-BITS-512-NEXT:    slli a1, a1, 33
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 2(sp)
-; RV64-BITS-512-NEXT:    slli a0, a0, 33
-; RV64-BITS-512-NEXT:    srli a0, a0, 63
-; RV64-BITS-512-NEXT:    sb a0, 1(sp)
-; RV64-BITS-512-NEXT:    li a0, 32
+; RV64-BITS-512-NEXT:    sb a1, 1(sp)
 ; RV64-BITS-512-NEXT:    mv a1, sp
 ; RV64-BITS-512-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 ; RV64-BITS-512-NEXT:    vle8.v v8, (a1)
@@ -1388,199 +1414,201 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
 ; RV32-BITS-UNKNOWN-NEXT:    addi s0, sp, 128
 ; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-BITS-UNKNOWN-NEXT:    andi sp, sp, -64
-; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT:    andi a1, a0, 1
+; RV32-BITS-UNKNOWN-NEXT:    li a0, 64
+; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a1, v0
+; RV32-BITS-UNKNOWN-NEXT:    seqz a1, a1
 ; RV32-BITS-UNKNOWN-NEXT:    sb a1, 63(sp)
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a0, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 32(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 30
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 62(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 61(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 60(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 27
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 59(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 58(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 57(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 24
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 56(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 23
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 55(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 22
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 54(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 21
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 53(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 20
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 52(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 19
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 51(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 18
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 50(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 17
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 49(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 16
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 48(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 15
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 47(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 14
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 46(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 13
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 45(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 12
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 44(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 11
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 43(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 10
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 42(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 9
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 41(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 8
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 40(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 7
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 39(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 6
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 38(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 5
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 37(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 4
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 36(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 3
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 35(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 2
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 34(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a0, a0, 1
-; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a0, 33(sp)
+; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 32(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 30
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 62(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 29
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 61(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 28
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 60(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 27
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 59(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 26
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 58(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 25
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 57(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 24
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 56(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 23
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 55(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 22
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 54(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 21
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 53(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 20
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 52(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 19
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 51(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 18
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 50(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 17
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 49(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 16
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 48(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 15
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 47(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 14
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 46(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 13
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 45(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 12
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 44(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 11
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 43(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 10
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 42(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 9
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 41(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 8
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 40(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 7
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 39(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 6
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 38(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 5
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 37(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 4
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 36(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 3
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 35(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 2
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 34(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a1, 1
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a1, 33(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    vslidedown.vi v8, v0, 1
-; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v8
-; RV32-BITS-UNKNOWN-NEXT:    andi a1, a0, 1
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 31(sp)
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a0, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 30
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 30(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 29(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 28(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 27
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 27(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 26(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 25(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 24
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 24(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 23
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 23(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 22
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 22(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 21
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 21(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 20
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 20(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 19
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 19(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 18
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 18(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 17
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 17(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 16
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 16(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 15
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 15(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 14
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 13
+; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v8
+; RV32-BITS-UNKNOWN-NEXT:    andi a2, a1, 1
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 31(sp)
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 0(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 30
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 30(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 29
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 29(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 28
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 28(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 27
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 27(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 26
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 26(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 25
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 25(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 24
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 24(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 23
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 23(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 22
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 22(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 21
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 21(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 20
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 20(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 19
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 19(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 18
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 18(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 17
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 17(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 16
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 16(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 15
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 15(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 14
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 14(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 13
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 13(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 12
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 12(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 11
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 11(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 10
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 10(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 9
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 9(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 8
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 8(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 7
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 7(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 6
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 6(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 5
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 5(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 4
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 4(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 3
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 3(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 2
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
+; RV32-BITS-UNKNOWN-NEXT:    sb a2, 2(sp)
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a1, 1
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 12
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 11
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 10
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 9
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 8
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 7
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 6
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 5
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 4
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 3
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 2
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a0, a0, 1
-; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a0, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT:    li a0, 64
+; RV32-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    mv a1, sp
 ; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 ; RV32-BITS-UNKNOWN-NEXT:    vle8.v v8, (a1)
@@ -1603,199 +1631,201 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
 ; RV32-BITS-256-NEXT:    addi s0, sp, 128
 ; RV32-BITS-256-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-BITS-256-NEXT:    andi sp, sp, -64
-; RV32-BITS-256-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-256-NEXT:    andi a1, a0, 1
+; RV32-BITS-256-NEXT:    li a0, 64
+; RV32-BITS-256-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-256-NEXT:    vfirst.m a1, v0
+; RV32-BITS-256-NEXT:    seqz a1, a1
 ; RV32-BITS-256-NEXT:    sb a1, 63(sp)
-; RV32-BITS-256-NEXT:    srli a1, a0, 31
-; RV32-BITS-256-NEXT:    sb a1, 32(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 30
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 62(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 29
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 61(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 28
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 60(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 27
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 59(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 26
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 58(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 25
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 57(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 24
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 56(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 23
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 55(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 22
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 54(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 21
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 53(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 20
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 52(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 19
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 51(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 18
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 50(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 17
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 49(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 16
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 48(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 15
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 47(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 14
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 46(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 13
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 45(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 12
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 44(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 11
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 43(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 10
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 42(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 9
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 41(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 8
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 40(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 7
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 39(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 6
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 38(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 5
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 37(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 4
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 36(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 3
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 35(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 2
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 34(sp)
-; RV32-BITS-256-NEXT:    slli a0, a0, 1
-; RV32-BITS-256-NEXT:    srli a0, a0, 31
-; RV32-BITS-256-NEXT:    sb a0, 33(sp)
+; RV32-BITS-256-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-BITS-256-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-256-NEXT:    srli a2, a1, 31
+; RV32-BITS-256-NEXT:    sb a2, 32(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 30
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 62(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 29
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 61(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 28
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 60(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 27
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 59(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 26
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 58(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 25
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 57(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 24
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 56(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 23
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 55(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 22
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 54(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 21
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 53(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 20
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 52(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 19
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 51(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 18
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 50(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 17
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 49(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 16
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 48(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 15
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 47(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 14
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 46(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 13
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 45(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 12
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 44(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 11
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 43(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 10
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 42(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 9
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 41(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 8
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 40(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 7
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 39(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 6
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 38(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 5
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 37(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 4
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 36(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 3
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 35(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 2
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 34(sp)
+; RV32-BITS-256-NEXT:    slli a1, a1, 1
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    sb a1, 33(sp)
 ; RV32-BITS-256-NEXT:    vslidedown.vi v8, v0, 1
-; RV32-BITS-256-NEXT:    vmv.x.s a0, v8
-; RV32-BITS-256-NEXT:    andi a1, a0, 1
-; RV32-BITS-256-NEXT:    sb a1, 31(sp)
-; RV32-BITS-256-NEXT:    srli a1, a0, 31
-; RV32-BITS-256-NEXT:    sb a1, 0(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 30
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 30(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 29
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 29(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 28
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 28(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 27
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 27(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 26
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 26(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 25
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 25(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 24
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 24(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 23
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 23(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 22
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 22(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 21
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 21(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 20
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 20(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 19
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 19(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 18
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 18(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 17
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 17(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 16
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 16(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 15
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 15(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 14
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 14(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 13
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 13(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 12
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 12(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 11
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 11(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 10
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 10(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 9
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 9(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 8
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 8(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 7
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 7(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 6
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 6(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 5
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 5(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 4
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 4(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 3
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 3(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 2
+; RV32-BITS-256-NEXT:    vmv.x.s a1, v8
+; RV32-BITS-256-NEXT:    andi a2, a1, 1
+; RV32-BITS-256-NEXT:    sb a2, 31(sp)
+; RV32-BITS-256-NEXT:    srli a2, a1, 31
+; RV32-BITS-256-NEXT:    sb a2, 0(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 30
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 30(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 29
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 29(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 28
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 28(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 27
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 27(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 26
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 26(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 25
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 25(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 24
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 24(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 23
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 23(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 22
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 22(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 21
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 21(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 20
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 20(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 19
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 19(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 18
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 18(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 17
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 17(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 16
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 16(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 15
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 15(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 14
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 14(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 13
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 13(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 12
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 12(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 11
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 11(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 10
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 10(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 9
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 9(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 8
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 8(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 7
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 7(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 6
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 6(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 5
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 5(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 4
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 4(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 3
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 3(sp)
+; RV32-BITS-256-NEXT:    slli a2, a1, 2
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    sb a2, 2(sp)
+; RV32-BITS-256-NEXT:    slli a1, a1, 1
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 2(sp)
-; RV32-BITS-256-NEXT:    slli a0, a0, 1
-; RV32-BITS-256-NEXT:    srli a0, a0, 31
-; RV32-BITS-256-NEXT:    sb a0, 1(sp)
-; RV32-BITS-256-NEXT:    li a0, 64
+; RV32-BITS-256-NEXT:    sb a1, 1(sp)
 ; RV32-BITS-256-NEXT:    mv a1, sp
 ; RV32-BITS-256-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 ; RV32-BITS-256-NEXT:    vle8.v v8, (a1)
@@ -1818,199 +1848,201 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
 ; RV32-BITS-512-NEXT:    addi s0, sp, 128
 ; RV32-BITS-512-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-BITS-512-NEXT:    andi sp, sp, -64
-; RV32-BITS-512-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-512-NEXT:    andi a1, a0, 1
+; RV32-BITS-512-NEXT:    li a0, 64
+; RV32-BITS-512-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-512-NEXT:    vfirst.m a1, v0
+; RV32-BITS-512-NEXT:    seqz a1, a1
 ; RV32-BITS-512-NEXT:    sb a1, 63(sp)
-; RV32-BITS-512-NEXT:    srli a1, a0, 31
-; RV32-BITS-512-NEXT:    sb a1, 32(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 30
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 62(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 29
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 61(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 28
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 60(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 27
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 59(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 26
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 58(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 25
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 57(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 24
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 56(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 23
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 55(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 22
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 54(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 21
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 53(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 20
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 52(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 19
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 51(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 18
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 50(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 17
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 49(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 16
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 48(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 15
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 47(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 14
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 46(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 13
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 45(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 12
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 44(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 11
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 43(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 10
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 42(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 9
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 41(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 8
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 40(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 7
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 39(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 6
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 38(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 5
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 37(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 4
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 36(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 3
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 35(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 2
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 34(sp)
-; RV32-BITS-512-NEXT:    slli a0, a0, 1
-; RV32-BITS-512-NEXT:    srli a0, a0, 31
-; RV32-BITS-512-NEXT:    sb a0, 33(sp)
+; RV32-BITS-512-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-BITS-512-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-512-NEXT:    srli a2, a1, 31
+; RV32-BITS-512-NEXT:    sb a2, 32(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 30
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 62(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 29
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 61(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 28
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 60(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 27
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 59(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 26
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 58(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 25
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 57(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 24
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 56(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 23
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 55(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 22
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 54(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 21
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 53(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 20
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 52(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 19
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 51(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 18
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 50(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 17
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 49(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 16
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 48(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 15
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 47(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 14
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 46(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 13
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 45(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 12
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 44(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 11
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 43(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 10
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 42(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 9
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 41(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 8
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 40(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 7
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 39(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 6
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 38(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 5
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 37(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 4
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 36(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 3
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 35(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 2
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 34(sp)
+; RV32-BITS-512-NEXT:    slli a1, a1, 1
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    sb a1, 33(sp)
 ; RV32-BITS-512-NEXT:    vslidedown.vi v8, v0, 1
-; RV32-BITS-512-NEXT:    vmv.x.s a0, v8
-; RV32-BITS-512-NEXT:    andi a1, a0, 1
-; RV32-BITS-512-NEXT:    sb a1, 31(sp)
-; RV32-BITS-512-NEXT:    srli a1, a0, 31
-; RV32-BITS-512-NEXT:    sb a1, 0(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 30
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 30(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 29
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 29(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 28
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 28(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 27
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 27(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 26
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 26(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 25
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 25(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 24
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 24(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 23
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 23(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 22
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 22(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 21
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 21(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 20
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 20(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 19
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 19(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 18
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 18(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 17
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 17(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 16
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 16(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 15
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 15(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 14
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 14(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 13
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 13(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 12
+; RV32-BITS-512-NEXT:    vmv.x.s a1, v8
+; RV32-BITS-512-NEXT:    andi a2, a1, 1
+; RV32-BITS-512-NEXT:    sb a2, 31(sp)
+; RV32-BITS-512-NEXT:    srli a2, a1, 31
+; RV32-BITS-512-NEXT:    sb a2, 0(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 30
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 30(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 29
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 29(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 28
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 28(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 27
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 27(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 26
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 26(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 25
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 25(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 24
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 24(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 23
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 23(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 22
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 22(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 21
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 21(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 20
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 20(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 19
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 19(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 18
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 18(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 17
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 17(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 16
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 16(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 15
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 15(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 14
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 14(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 13
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 13(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 12
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 12(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 11
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 11(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 10
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 10(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 9
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 9(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 8
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 8(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 7
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 7(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 6
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 6(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 5
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 5(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 4
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 4(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 3
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 3(sp)
+; RV32-BITS-512-NEXT:    slli a2, a1, 2
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    sb a2, 2(sp)
+; RV32-BITS-512-NEXT:    slli a1, a1, 1
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 12(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 11
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 11(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 10
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 10(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 9
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 9(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 8
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 8(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 7
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 7(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 6
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 6(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 5
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 5(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 4
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 4(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 3
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 3(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 2
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 2(sp)
-; RV32-BITS-512-NEXT:    slli a0, a0, 1
-; RV32-BITS-512-NEXT:    srli a0, a0, 31
-; RV32-BITS-512-NEXT:    sb a0, 1(sp)
-; RV32-BITS-512-NEXT:    li a0, 64
+; RV32-BITS-512-NEXT:    sb a1, 1(sp)
 ; RV32-BITS-512-NEXT:    mv a1, sp
 ; RV32-BITS-512-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 ; RV32-BITS-512-NEXT:    vle8.v v8, (a1)
@@ -2033,198 +2065,200 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
 ; RV64-BITS-UNKNOWN-NEXT:    addi s0, sp, 128
 ; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-BITS-UNKNOWN-NEXT:    andi sp, sp, -64
-; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT:    andi a1, a0, 1
+; RV64-BITS-UNKNOWN-NEXT:    li a0, 64
+; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a1, v0
+; RV64-BITS-UNKNOWN-NEXT:    seqz a1, a1
 ; RV64-BITS-UNKNOWN-NEXT:    sb a1, 63(sp)
-; RV64-BITS-UNKNOWN-NEXT:    srliw a1, a0, 31
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 32(sp)
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a0, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 62
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 62(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 61
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 61(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 60
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 60(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 59
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 59(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 58
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 58(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 57
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 57(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 56
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 56(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 55
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 55(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 54
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 54(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 53
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 53(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 52
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 52(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 51
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 51(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 50
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 50(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 49
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 49(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 48
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 48(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 47
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 47(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 46
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 46(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 45
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 45(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 44
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 44(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 43
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 43(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 42
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 42(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 41
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 41(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 40
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 40(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 39
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 39(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 38
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 38(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 37
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 37(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 36
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 36(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 35
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 35(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 34
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 34(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 33
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 33(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 31
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 31(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 30
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 30(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 29(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 28(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 27
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 27(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 26(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 25(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 24
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 24(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 23
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 23(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 22
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 22(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 21
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 21(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 20
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 20(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 19
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 19(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 18
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 18(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 17
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 17(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 16
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 16(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 15
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 15(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 14
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 13
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 12
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 11
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 10
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 9
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 8
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 7
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 6
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 5
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 4
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 3
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 2
+; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v0
+; RV64-BITS-UNKNOWN-NEXT:    srliw a2, a1, 31
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 32(sp)
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 0(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 62
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 62(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 61
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 61(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 60
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 60(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 59
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 59(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 58
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 58(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 57
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 57(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 56
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 56(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 55
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 55(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 54
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 54(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 53
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 53(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 52
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 52(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 51
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 51(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 50
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 50(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 49
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 49(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 48
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 48(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 47
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 47(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 46
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 46(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 45
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 45(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 44
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 44(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 43
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 43(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 42
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 42(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 41
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 41(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 40
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 40(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 39
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 39(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 38
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 38(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 37
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 37(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 36
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 36(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 35
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 35(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 34
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 34(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 33
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 33(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 31
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 31(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 30
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 30(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 29
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 29(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 28
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 28(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 27
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 27(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 26
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 26(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 25
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 25(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 24
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 24(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 23
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 23(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 22
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 22(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 21
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 21(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 20
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 20(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 19
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 19(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 18
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 18(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 17
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 17(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 16
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 16(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 15
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 15(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 14
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 14(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 13
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 13(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 12
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 12(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 11
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 11(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 10
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 10(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 9
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 9(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 8
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 8(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 7
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 7(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 6
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 6(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 5
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 5(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 4
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 4(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 3
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 3(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 2
+; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
+; RV64-BITS-UNKNOWN-NEXT:    sb a2, 2(sp)
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a1, 1
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a0, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT:    li a0, 64
+; RV64-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    mv a1, sp
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vle8.v v8, (a1)
@@ -2247,198 +2281,200 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
 ; RV64-BITS-256-NEXT:    addi s0, sp, 128
 ; RV64-BITS-256-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-BITS-256-NEXT:    andi sp, sp, -64
-; RV64-BITS-256-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-256-NEXT:    andi a1, a0, 1
+; RV64-BITS-256-NEXT:    li a0, 64
+; RV64-BITS-256-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; RV64-BITS-256-NEXT:    vfirst.m a1, v0
+; RV64-BITS-256-NEXT:    seqz a1, a1
 ; RV64-BITS-256-NEXT:    sb a1, 63(sp)
-; RV64-BITS-256-NEXT:    srliw a1, a0, 31
-; RV64-BITS-256-NEXT:    sb a1, 32(sp)
-; RV64-BITS-256-NEXT:    srli a1, a0, 63
-; RV64-BITS-256-NEXT:    sb a1, 0(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 62
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 62(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 61
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 61(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 60
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 60(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 59
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 59(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 58
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 58(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 57
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 57(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 56
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 56(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 55
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 55(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 54
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 54(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 53
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 53(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 52
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 52(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 51
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 51(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 50
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 50(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 49
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 49(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 48
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 48(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 47
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 47(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 46
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 46(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 45
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 45(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 44
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 44(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 43
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 43(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 42
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 42(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 41
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 41(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 40
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 40(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 39
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 39(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 38
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 38(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 37
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 37(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 36
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 36(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 35
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 35(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 34
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 34(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 33
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 33(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 31
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 31(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 30
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 30(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 29
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 29(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 28
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 28(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 27
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 27(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 26
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 26(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 25
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 25(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 24
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 24(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 23
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 23(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 22
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 22(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 21
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 21(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 20
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 20(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 19
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 19(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 18
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 18(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 17
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 17(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 16
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 16(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 15
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 15(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 14
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 14(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 13
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 13(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 12
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 12(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 11
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 11(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 10
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 10(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 9
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 9(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 8
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 8(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 7
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 7(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 6
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 6(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 5
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 5(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 4
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 4(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 3
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 3(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 2
+; RV64-BITS-256-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
+; RV64-BITS-256-NEXT:    vmv.x.s a1, v0
+; RV64-BITS-256-NEXT:    srliw a2, a1, 31
+; RV64-BITS-256-NEXT:    sb a2, 32(sp)
+; RV64-BITS-256-NEXT:    srli a2, a1, 63
+; RV64-BITS-256-NEXT:    sb a2, 0(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 62
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 62(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 61
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 61(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 60
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 60(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 59
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 59(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 58
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 58(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 57
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 57(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 56
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 56(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 55
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 55(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 54
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 54(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 53
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 53(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 52
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 52(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 51
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 51(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 50
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 50(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 49
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 49(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 48
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 48(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 47
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 47(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 46
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 46(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 45
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 45(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 44
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 44(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 43
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 43(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 42
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 42(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 41
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 41(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 40
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 40(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 39
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 39(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 38
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 38(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 37
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 37(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 36
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 36(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 35
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 35(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 34
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 34(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 33
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 33(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 31
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 31(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 30
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 30(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 29
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 29(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 28
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 28(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 27
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 27(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 26
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 26(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 25
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 25(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 24
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 24(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 23
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 23(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 22
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 22(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 21
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 21(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 20
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 20(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 19
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 19(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 18
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 18(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 17
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 17(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 16
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 16(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 15
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 15(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 14
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 14(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 13
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 13(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 12
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 12(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 11
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 11(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 10
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 10(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 9
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 9(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 8
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 8(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 7
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 7(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 6
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 6(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 5
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 5(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 4
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 4(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 3
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 3(sp)
+; RV64-BITS-256-NEXT:    slli a2, a1, 2
+; RV64-BITS-256-NEXT:    srli a2, a2, 63
+; RV64-BITS-256-NEXT:    sb a2, 2(sp)
+; RV64-BITS-256-NEXT:    slli a1, a1, 1
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 2(sp)
-; RV64-BITS-256-NEXT:    slli a0, a0, 1
-; RV64-BITS-256-NEXT:    srli a0, a0, 63
-; RV64-BITS-256-NEXT:    sb a0, 1(sp)
-; RV64-BITS-256-NEXT:    li a0, 64
+; RV64-BITS-256-NEXT:    sb a1, 1(sp)
 ; RV64-BITS-256-NEXT:    mv a1, sp
 ; RV64-BITS-256-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 ; RV64-BITS-256-NEXT:    vle8.v v8, (a1)
@@ -2461,198 +2497,200 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
 ; RV64-BITS-512-NEXT:    addi s0, sp, 128
 ; RV64-BITS-512-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-BITS-512-NEXT:    andi sp, sp, -64
-; RV64-BITS-512-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-512-NEXT:    andi a1, a0, 1
+; RV64-BITS-512-NEXT:    li a0, 64
+; RV64-BITS-512-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; RV64-BITS-512-NEXT:    vfirst.m a1, v0
+; RV64-BITS-512-NEXT:    seqz a1, a1
 ; RV64-BITS-512-NEXT:    sb a1, 63(sp)
-; RV64-BITS-512-NEXT:    srliw a1, a0, 31
-; RV64-BITS-512-NEXT:    sb a1, 32(sp)
-; RV64-BITS-512-NEXT:    srli a1, a0, 63
-; RV64-BITS-512-NEXT:    sb a1, 0(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 62
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 62(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 61
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 61(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 60
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 60(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 59
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 59(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 58
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 58(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 57
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 57(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 56
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 56(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 55
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 55(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 54
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 54(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 53
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 53(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 52
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 52(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 51
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 51(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 50
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 50(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 49
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 49(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 48
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 48(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 47
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 47(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 46
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 46(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 45
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 45(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 44
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 44(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 43
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 43(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 42
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 42(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 41
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 41(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 40
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 40(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 39
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 39(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 38
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 38(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 37
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 37(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 36
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 36(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 35
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 35(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 34
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 34(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 33
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 33(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 31
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 31(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 30
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 30(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 29
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 29(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 28
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 28(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 27
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 27(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 26
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 26(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 25
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 25(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 24
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 24(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 23
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 23(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 22
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 22(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 21
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 21(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 20
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 20(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 19
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 19(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 18
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 18(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 17
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 17(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 16
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 16(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 15
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 15(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 14
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 14(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 13
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 13(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 12
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 12(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 11
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 11(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 10
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 10(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 9
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 9(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 8
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 8(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 7
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 7(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 6
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 6(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 5
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 5(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 4
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 4(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 3
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 3(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 2
+; RV64-BITS-512-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
+; RV64-BITS-512-NEXT:    vmv.x.s a1, v0
+; RV64-BITS-512-NEXT:    srliw a2, a1, 31
+; RV64-BITS-512-NEXT:    sb a2, 32(sp)
+; RV64-BITS-512-NEXT:    srli a2, a1, 63
+; RV64-BITS-512-NEXT:    sb a2, 0(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 62
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 62(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 61
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 61(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 60
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 60(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 59
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 59(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 58
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 58(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 57
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 57(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 56
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 56(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 55
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 55(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 54
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 54(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 53
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 53(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 52
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 52(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 51
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 51(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 50
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 50(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 49
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 49(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 48
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 48(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 47
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 47(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 46
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 46(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 45
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 45(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 44
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 44(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 43
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 43(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 42
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 42(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 41
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 41(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 40
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 40(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 39
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 39(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 38
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 38(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 37
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 37(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 36
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 36(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 35
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 35(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 34
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 34(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 33
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 33(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 31
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 31(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 30
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 30(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 29
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 29(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 28
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 28(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 27
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 27(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 26
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 26(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 25
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 25(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 24
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 24(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 23
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 23(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 22
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 22(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 21
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 21(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 20
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 20(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 19
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 19(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 18
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 18(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 17
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 17(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 16
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 16(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 15
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 15(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 14
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 14(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 13
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 13(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 12
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 12(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 11
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 11(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 10
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 10(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 9
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 9(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 8
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 8(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 7
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 7(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 6
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 6(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 5
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 5(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 4
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 4(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 3
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 3(sp)
+; RV64-BITS-512-NEXT:    slli a2, a1, 2
+; RV64-BITS-512-NEXT:    srli a2, a2, 63
+; RV64-BITS-512-NEXT:    sb a2, 2(sp)
+; RV64-BITS-512-NEXT:    slli a1, a1, 1
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 2(sp)
-; RV64-BITS-512-NEXT:    slli a0, a0, 1
-; RV64-BITS-512-NEXT:    srli a0, a0, 63
-; RV64-BITS-512-NEXT:    sb a0, 1(sp)
-; RV64-BITS-512-NEXT:    li a0, 64
+; RV64-BITS-512-NEXT:    sb a1, 1(sp)
 ; RV64-BITS-512-NEXT:    mv a1, sp
 ; RV64-BITS-512-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 ; RV64-BITS-512-NEXT:    vle8.v v8, (a1)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index 2035a578c8023..ded6df1a77ef9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -453,10 +453,9 @@ define i1 @extractelt_v1i1_idx0(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <1 x i8>, ptr %x
   %b = icmp eq <1 x i8> %a, zeroinitializer
@@ -469,10 +468,9 @@ define i1 @extractelt_v2i1_idx0(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <2 x i8>, ptr %x
   %b = icmp eq <2 x i8> %a, zeroinitializer
@@ -485,10 +483,9 @@ define i1 @extractelt_v4i1_idx0(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <4 x i8>, ptr %x
   %b = icmp eq <4 x i8> %a, zeroinitializer
@@ -502,8 +499,8 @@ define i1 @extractelt_v8i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vmseq.vi v8, v8, 0
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    vfirst.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <8 x i8>, ptr %x
   %b = icmp eq <8 x i8> %a, zeroinitializer
@@ -517,9 +514,8 @@ define i1 @extractelt_v16i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vmseq.vi v8, v8, 0
-; CHECK-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    vfirst.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = icmp eq <16 x i8> %a, zeroinitializer
@@ -534,9 +530,8 @@ define i1 @extractelt_v32i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vmseq.vi v10, v8, 0
-; CHECK-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-NEXT:    vmv.x.s a0, v10
-; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    vfirst.m a0, v10
+; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = icmp eq <32 x i8> %a, zeroinitializer
@@ -545,49 +540,15 @@ define i1 @extractelt_v32i1_idx0(ptr %x) nounwind {
 }
 
 define i1 @extractelt_v64i1_idx0(ptr %x) nounwind {
-; RV32-LABEL: extractelt_v64i1_idx0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 64
-; RV32-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    vmseq.vi v12, v8, 0
-; RV32-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.x.s a0, v12
-; RV32-NEXT:    andi a0, a0, 1
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: extractelt_v64i1_idx0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 64
-; RV64-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    vmseq.vi v12, v8, 0
-; RV64-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v12
-; RV64-NEXT:    andi a0, a0, 1
-; RV64-NEXT:    ret
-;
-; RV32ZBS-LABEL: extractelt_v64i1_idx0:
-; RV32ZBS:       # %bb.0:
-; RV32ZBS-NEXT:    li a1, 64
-; RV32ZBS-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; RV32ZBS-NEXT:    vle8.v v8, (a0)
-; RV32ZBS-NEXT:    vmseq.vi v12, v8, 0
-; RV32ZBS-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV32ZBS-NEXT:    vmv.x.s a0, v12
-; RV32ZBS-NEXT:    andi a0, a0, 1
-; RV32ZBS-NEXT:    ret
-;
-; RV64ZBS-LABEL: extractelt_v64i1_idx0:
-; RV64ZBS:       # %bb.0:
-; RV64ZBS-NEXT:    li a1, 64
-; RV64ZBS-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; RV64ZBS-NEXT:    vle8.v v8, (a0)
-; RV64ZBS-NEXT:    vmseq.vi v12, v8, 0
-; RV64ZBS-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64ZBS-NEXT:    vmv.x.s a0, v12
-; RV64ZBS-NEXT:    andi a0, a0, 1
-; RV64ZBS-NEXT:    ret
+; CHECK-LABEL: extractelt_v64i1_idx0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vmseq.vi v12, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v12
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = icmp eq <64 x i8> %a, zeroinitializer
   %c = extractelement <64 x i1> %b, i64 0
@@ -595,49 +556,15 @@ define i1 @extractelt_v64i1_idx0(ptr %x) nounwind {
 }
 
 define i1 @extractelt_v128i1_idx0(ptr %x) nounwind {
-; RV32-LABEL: extractelt_v128i1_idx0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 128
-; RV32-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    vmseq.vi v16, v8, 0
-; RV32-NEXT:    vsetivli zero, 0, e32, m1, ta, ma
-; RV32-NEXT:    vmv.x.s a0, v16
-; RV32-NEXT:    andi a0, a0, 1
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: extractelt_v128i1_idx0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 128
-; RV64-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    vmseq.vi v16, v8, 0
-; RV64-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v16
-; RV64-NEXT:    andi a0, a0, 1
-; RV64-NEXT:    ret
-;
-; RV32ZBS-LABEL: extractelt_v128i1_idx0:
-; RV32ZBS:       # %bb.0:
-; RV32ZBS-NEXT:    li a1, 128
-; RV32ZBS-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV32ZBS-NEXT:    vle8.v v8, (a0)
-; RV32ZBS-NEXT:    vmseq.vi v16, v8, 0
-; RV32ZBS-NEXT:    vsetivli zero, 0, e32, m1, ta, ma
-; RV32ZBS-NEXT:    vmv.x.s a0, v16
-; RV32ZBS-NEXT:    andi a0, a0, 1
-; RV32ZBS-NEXT:    ret
-;
-; RV64ZBS-LABEL: extractelt_v128i1_idx0:
-; RV64ZBS:       # %bb.0:
-; RV64ZBS-NEXT:    li a1, 128
-; RV64ZBS-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV64ZBS-NEXT:    vle8.v v8, (a0)
-; RV64ZBS-NEXT:    vmseq.vi v16, v8, 0
-; RV64ZBS-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64ZBS-NEXT:    vmv.x.s a0, v16
-; RV64ZBS-NEXT:    andi a0, a0, 1
-; RV64ZBS-NEXT:    ret
+; CHECK-LABEL: extractelt_v128i1_idx0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vmseq.vi v16, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v16
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = icmp eq <128 x i8> %a, zeroinitializer
   %c = extractelement <128 x i1> %b, i64 0
@@ -645,49 +572,15 @@ define i1 @extractelt_v128i1_idx0(ptr %x) nounwind {
 }
 
 define i1 @extractelt_v256i1_idx0(ptr %x) nounwind {
-; RV32-LABEL: extractelt_v256i1_idx0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 128
-; RV32-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    vmseq.vi v16, v8, 0
-; RV32-NEXT:    vsetivli zero, 0, e32, m1, ta, ma
-; RV32-NEXT:    vmv.x.s a0, v16
-; RV32-NEXT:    andi a0, a0, 1
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: extractelt_v256i1_idx0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 128
-; RV64-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    vmseq.vi v16, v8, 0
-; RV64-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v16
-; RV64-NEXT:    andi a0, a0, 1
-; RV64-NEXT:    ret
-;
-; RV32ZBS-LABEL: extractelt_v256i1_idx0:
-; RV32ZBS:       # %bb.0:
-; RV32ZBS-NEXT:    li a1, 128
-; RV32ZBS-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV32ZBS-NEXT:    vle8.v v8, (a0)
-; RV32ZBS-NEXT:    vmseq.vi v16, v8, 0
-; RV32ZBS-NEXT:    vsetivli zero, 0, e32, m1, ta, ma
-; RV32ZBS-NEXT:    vmv.x.s a0, v16
-; RV32ZBS-NEXT:    andi a0, a0, 1
-; RV32ZBS-NEXT:    ret
-;
-; RV64ZBS-LABEL: extractelt_v256i1_idx0:
-; RV64ZBS:       # %bb.0:
-; RV64ZBS-NEXT:    li a1, 128
-; RV64ZBS-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV64ZBS-NEXT:    vle8.v v8, (a0)
-; RV64ZBS-NEXT:    vmseq.vi v16, v8, 0
-; RV64ZBS-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64ZBS-NEXT:    vmv.x.s a0, v16
-; RV64ZBS-NEXT:    andi a0, a0, 1
-; RV64ZBS-NEXT:    ret
+; CHECK-LABEL: extractelt_v256i1_idx0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vmseq.vi v16, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v16
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
   %a = load <256 x i8>, ptr %x
   %b = icmp eq <256 x i8> %a, zeroinitializer
   %c = extractelement <256 x i1> %b, i64 0

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index df94ead917a73..11b059a052b10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -35,11 +35,8 @@ define <1 x i8> @mgather_v1i8(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i8> %passthru)
 ; RV64ZVE32F-LABEL: mgather_v1i8:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB0_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB0_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
 ; RV64ZVE32F-NEXT:    vlse8.v v8, (a0), zero
@@ -875,11 +872,8 @@ define <1 x i16> @mgather_v1i16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i16> %passthr
 ; RV64ZVE32F-LABEL: mgather_v1i16:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB13_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB13_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vlse16.v v8, (a0), zero
@@ -2081,11 +2075,8 @@ define <1 x i32> @mgather_v1i32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i32> %passthr
 ; RV64ZVE32F-LABEL: mgather_v1i32:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB27_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB27_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV64ZVE32F-NEXT:    vlse32.v v8, (a0), zero
@@ -3631,11 +3622,8 @@ define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthr
 ; RV32ZVE32F-LABEL: mgather_v1i64:
 ; RV32ZVE32F:       # %bb.0:
 ; RV32ZVE32F-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV32ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32ZVE32F-NEXT:    vmv.x.s a2, v9
-; RV32ZVE32F-NEXT:    andi a2, a2, 1
-; RV32ZVE32F-NEXT:    beqz a2, .LBB42_2
+; RV32ZVE32F-NEXT:    vfirst.m a2, v0
+; RV32ZVE32F-NEXT:    bnez a2, .LBB42_2
 ; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV32ZVE32F-NEXT:    vmv.x.s a0, v8
@@ -3647,11 +3635,8 @@ define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthr
 ; RV64ZVE32F-LABEL: mgather_v1i64:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v8, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v8, v8, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
-; RV64ZVE32F-NEXT:    andi a2, a2, 1
-; RV64ZVE32F-NEXT:    beqz a2, .LBB42_2
+; RV64ZVE32F-NEXT:    vfirst.m a2, v0
+; RV64ZVE32F-NEXT:    bnez a2, .LBB42_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV64ZVE32F-NEXT:    ld a1, 0(a0)
 ; RV64ZVE32F-NEXT:  .LBB42_2: # %else
@@ -7164,11 +7149,8 @@ define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passt
 ; RV64ZVE32F-LABEL: mgather_v1f16:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB58_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB58_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vlse16.v v8, (a0), zero
@@ -8134,11 +8116,8 @@ define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %pas
 ; RV64ZVE32F-LABEL: mgather_v1f32:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB68_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB68_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV64ZVE32F-NEXT:    vlse32.v v8, (a0), zero
@@ -9558,11 +9537,8 @@ define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %p
 ; RV32ZVE32F-LABEL: mgather_v1f64:
 ; RV32ZVE32F:       # %bb.0:
 ; RV32ZVE32F-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV32ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32ZVE32F-NEXT:    vmv.x.s a0, v9
-; RV32ZVE32F-NEXT:    andi a0, a0, 1
-; RV32ZVE32F-NEXT:    beqz a0, .LBB81_2
+; RV32ZVE32F-NEXT:    vfirst.m a0, v0
+; RV32ZVE32F-NEXT:    bnez a0, .LBB81_2
 ; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV32ZVE32F-NEXT:    vmv.x.s a0, v8
@@ -9573,11 +9549,8 @@ define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %p
 ; RV64ZVE32F-LABEL: mgather_v1f64:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v8, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v8, v8, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB81_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB81_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV64ZVE32F-NEXT:    fld fa0, 0(a0)
 ; RV64ZVE32F-NEXT:  .LBB81_2: # %else

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 494f556aadcd7..aecbd1fbc5f15 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -32,11 +32,8 @@ define void @mscatter_v1i8(<1 x i8> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
 ; RV64ZVE32F-LABEL: mscatter_v1i8:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB0_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB0_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
 ; RV64ZVE32F-NEXT:    vse8.v v8, (a0)
@@ -657,11 +654,8 @@ define void @mscatter_v1i16(<1 x i16> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
 ; RV64ZVE32F-LABEL: mscatter_v1i16:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB10_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB10_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vse16.v v8, (a0)
@@ -1637,11 +1631,8 @@ define void @mscatter_v1i32(<1 x i32> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
 ; RV64ZVE32F-LABEL: mscatter_v1i32:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB22_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB22_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV64ZVE32F-NEXT:    vse32.v v8, (a0)
@@ -2942,11 +2933,8 @@ define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
 ; RV32ZVE32F-LABEL: mscatter_v1i64:
 ; RV32ZVE32F:       # %bb.0:
 ; RV32ZVE32F-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV32ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32ZVE32F-NEXT:    vmv.x.s a2, v9
-; RV32ZVE32F-NEXT:    andi a2, a2, 1
-; RV32ZVE32F-NEXT:    beqz a2, .LBB36_2
+; RV32ZVE32F-NEXT:    vfirst.m a2, v0
+; RV32ZVE32F-NEXT:    bnez a2, .LBB36_2
 ; RV32ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
@@ -2958,11 +2946,8 @@ define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
 ; RV64ZVE32F-LABEL: mscatter_v1i64:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v8, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v8, v8, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
-; RV64ZVE32F-NEXT:    andi a2, a2, 1
-; RV64ZVE32F-NEXT:    beqz a2, .LBB36_2
+; RV64ZVE32F-NEXT:    vfirst.m a2, v0
+; RV64ZVE32F-NEXT:    bnez a2, .LBB36_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV64ZVE32F-NEXT:    sd a0, 0(a1)
 ; RV64ZVE32F-NEXT:  .LBB36_2: # %else
@@ -6089,11 +6074,8 @@ define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
 ; RV64ZVE32F-LABEL: mscatter_v1f16:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB52_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB52_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vse16.v v8, (a0)
@@ -6942,11 +6924,8 @@ define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
 ; RV64ZVE32F-LABEL: mscatter_v1f32:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v9
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB62_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB62_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV64ZVE32F-NEXT:    vse32.v v8, (a0)
@@ -8191,11 +8170,8 @@ define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
 ; RV32ZVE32F-LABEL: mscatter_v1f64:
 ; RV32ZVE32F:       # %bb.0:
 ; RV32ZVE32F-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT:    vmv.v.i v9, 0
-; RV32ZVE32F-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32ZVE32F-NEXT:    vmv.x.s a0, v9
-; RV32ZVE32F-NEXT:    andi a0, a0, 1
-; RV32ZVE32F-NEXT:    beqz a0, .LBB75_2
+; RV32ZVE32F-NEXT:    vfirst.m a0, v0
+; RV32ZVE32F-NEXT:    bnez a0, .LBB75_2
 ; RV32ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV32ZVE32F-NEXT:    vmv.x.s a0, v8
@@ -8206,11 +8182,8 @@ define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
 ; RV64ZVE32F-LABEL: mscatter_v1f64:
 ; RV64ZVE32F:       # %bb.0:
 ; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.v.i v8, 0
-; RV64ZVE32F-NEXT:    vmerge.vim v8, v8, 1, v0
-; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
-; RV64ZVE32F-NEXT:    andi a1, a1, 1
-; RV64ZVE32F-NEXT:    beqz a1, .LBB75_2
+; RV64ZVE32F-NEXT:    vfirst.m a1, v0
+; RV64ZVE32F-NEXT:    bnez a1, .LBB75_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
 ; RV64ZVE32F-NEXT:  .LBB75_2: # %else

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
index 28e4c74e1c324..ef0f607de26e2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
@@ -7,45 +7,13 @@
 declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>)
 
 define signext i1 @vreduce_or_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_or_v1i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX1-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_or_v1i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX1-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_or_v1i1:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX8-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_or_v1i1:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX8-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX8-RV64-NEXT:    ret
+; CHECK-LABEL: vreduce_or_v1i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %v)
   ret i1 %red
 }
@@ -53,45 +21,13 @@ define signext i1 @vreduce_or_v1i1(<1 x i1> %v) {
 declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>)
 
 define signext i1 @vreduce_xor_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_xor_v1i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX1-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_xor_v1i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX1-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_xor_v1i1:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX8-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_xor_v1i1:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX8-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX8-RV64-NEXT:    ret
+; CHECK-LABEL: vreduce_xor_v1i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %v)
   ret i1 %red
 }
@@ -99,45 +35,13 @@ define signext i1 @vreduce_xor_v1i1(<1 x i1> %v) {
 declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>)
 
 define signext i1 @vreduce_and_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_and_v1i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX1-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_and_v1i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX1-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_and_v1i1:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX8-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_and_v1i1:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX8-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX8-RV64-NEXT:    ret
+; CHECK-LABEL: vreduce_and_v1i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
   ret i1 %red
 }
@@ -145,45 +49,13 @@ define signext i1 @vreduce_and_v1i1(<1 x i1> %v) {
 declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1>)
 
 define signext i1 @vreduce_umax_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_umax_v1i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX1-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_umax_v1i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX1-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_umax_v1i1:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX8-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_umax_v1i1:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX8-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX8-RV64-NEXT:    ret
+; CHECK-LABEL: vreduce_umax_v1i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %v)
   ret i1 %red
 }
@@ -191,45 +63,13 @@ define signext i1 @vreduce_umax_v1i1(<1 x i1> %v) {
 declare i1 @llvm.vector.reduce.smax.v1i1(<1 x i1>)
 
 define signext i1 @vreduce_smax_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_smax_v1i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX1-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_smax_v1i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX1-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_smax_v1i1:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX8-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_smax_v1i1:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX8-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX8-RV64-NEXT:    ret
+; CHECK-LABEL: vreduce_smax_v1i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> %v)
   ret i1 %red
 }
@@ -237,45 +77,13 @@ define signext i1 @vreduce_smax_v1i1(<1 x i1> %v) {
 declare i1 @llvm.vector.reduce.umin.v1i1(<1 x i1>)
 
 define signext i1 @vreduce_umin_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_umin_v1i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX1-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_umin_v1i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX1-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_umin_v1i1:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX8-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_umin_v1i1:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX8-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX8-RV64-NEXT:    ret
+; CHECK-LABEL: vreduce_umin_v1i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> %v)
   ret i1 %red
 }
@@ -283,45 +91,13 @@ define signext i1 @vreduce_umin_v1i1(<1 x i1> %v) {
 declare i1 @llvm.vector.reduce.smin.v1i1(<1 x i1>)
 
 define signext i1 @vreduce_smin_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_smin_v1i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX1-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_smin_v1i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX1-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_smin_v1i1:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX8-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_smin_v1i1:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX8-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX8-RV64-NEXT:    ret
+; CHECK-LABEL: vreduce_smin_v1i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> %v)
   ret i1 %red
 }
@@ -1223,45 +999,13 @@ define signext i1 @vreduce_smin_v64i1(<64 x i1> %v) {
 declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1>)
 
 define signext i1 @vreduce_add_v1i1(<1 x i1> %v) {
-; LMULMAX1-RV32-LABEL: vreduce_add_v1i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX1-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: vreduce_add_v1i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX1-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX1-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: vreduce_add_v1i1:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV32-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV32-NEXT:    slli a0, a0, 31
-; LMULMAX8-RV32-NEXT:    srai a0, a0, 31
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: vreduce_add_v1i1:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8-RV64-NEXT:    vmv.x.s a0, v8
-; LMULMAX8-RV64-NEXT:    slli a0, a0, 63
-; LMULMAX8-RV64-NEXT:    srai a0, a0, 63
-; LMULMAX8-RV64-NEXT:    ret
+; CHECK-LABEL: vreduce_add_v1i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %v)
   ret i1 %red
 }


        


More information about the llvm-commits mailing list