[llvm] 35d218e - [RISCV] Use vslide1down idiom for generic build_vector

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Mon May 1 19:08:14 PDT 2023


Author: Philip Reames
Date: 2023-05-01T19:04:35-07:00
New Revision: 35d218e92740fb49ad5e2be4c700aa38c1133809

URL: https://github.com/llvm/llvm-project/commit/35d218e92740fb49ad5e2be4c700aa38c1133809
DIFF: https://github.com/llvm/llvm-project/commit/35d218e92740fb49ad5e2be4c700aa38c1133809.diff

LOG: [RISCV] Use vslide1down idiom for generic build_vector

We had previously been going through the stack.

A couple small notes:
    We have the vslide1down idiom in a few other places. As a post patch, I plan to try to common the code a bit.
    VF=2 case is still going through the splat + insert path. Picking the optimal sequence for this seems to be a bit fiddly (due to constant mat costs), so I restricted this to cases which would have previously hit the stack.
    I'm only handling integer vectors for the moment. Mostly because I don't see the existing vfslide1down ISD nodes being in place. This will be an obvious followup.
    One of the test diffs does expose a missing combine - a build_vector with a prefix coming from a vector extract sequence. The code after this is arguably worse (due to domain crossing vs stack store), but I think this is a narrow enough case to be non-blocking for now. Let me know if you disagree.

Differential Revision: https://reviews.llvm.org/D149263

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
    llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
    llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
    llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ff081a7eead9..d8ebe5ea9355 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3186,7 +3186,24 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     return Vec;
   }
 
-  return SDValue();
+  // For constant vectors, use generic constant pool lowering.  Otherwise,
+  // we'd have to materialize constants in GPRs just to move them into the
+  // vector.
+  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()))
+    return SDValue();
+
+  // We can use a series of vslide1down instructions to move values in GPRs
+  // into the appropriate place in the result vector.  We use slide1down
+  // to avoid the register group overlap constraint of vslide1up.
+  if (VT.isFloatingPoint())
+    // TODO: Use vfslide1down.
+    return SDValue();
+
+  SDValue Vec = DAG.getUNDEF(ContainerVT);
+  for (const SDValue &V : Op->ops())
+    Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, ContainerVT,
+                      DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL);
+  return convertFromScalableVector(VT, Vec, DAG, Subtarget);
 }
 
 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
index 6ea49d5a1314..212b2b840ce6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
@@ -34,29 +34,23 @@ define <2 x i1> @reverse_v2i1(<2 x i1> %a) {
 define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
 ; CHECK-LABEL: reverse_v4i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vslidedown.vi v9, v8, 1
-; CHECK-NEXT:    addi a0, sp, 14
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT:    vse8.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v9, v8, 2
-; CHECK-NEXT:    addi a0, sp, 13
-; CHECK-NEXT:    vse8.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v8, v8, 3
-; CHECK-NEXT:    addi a0, sp, 12
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vfirst.m a1, v0
-; CHECK-NEXT:    seqz a1, a1
-; CHECK-NEXT:    sb a1, 15(sp)
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vslidedown.vi v9, v8, 3
+; CHECK-NEXT:    vmv.x.s a0, v9
+; CHECK-NEXT:    vslide1down.vx v9, v8, a0
+; CHECK-NEXT:    vslidedown.vi v10, v8, 2
+; CHECK-NEXT:    vmv.x.s a0, v10
+; CHECK-NEXT:    vslide1down.vx v9, v9, a0
+; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vslide1down.vx v8, v9, a0
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %res = call <4 x i1> @llvm.experimental.vector.reverse.v4i1(<4 x i1> %a)
   ret <4 x i1> %res
@@ -65,224 +59,200 @@ define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
 define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 ; RV32-BITS-UNKNOWN-LABEL: reverse_v8i1:
 ; RV32-BITS-UNKNOWN:       # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, -16
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 16
-; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
-; RV32-BITS-UNKNOWN-NEXT:    seqz a0, a0
-; RV32-BITS-UNKNOWN-NEXT:    sb a0, 15(sp)
+; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
 ; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 30
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 24
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
+; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 12(sp)
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 27
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a0, a0, 24
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a0, 30
 ; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a0, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT:    addi a0, sp, 8
-; RV32-BITS-UNKNOWN-NEXT:    vle8.v v8, (a0)
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, 16
 ; RV32-BITS-UNKNOWN-NEXT:    ret
 ;
 ; RV32-BITS-256-LABEL: reverse_v8i1:
 ; RV32-BITS-256:       # %bb.0:
-; RV32-BITS-256-NEXT:    addi sp, sp, -16
-; RV32-BITS-256-NEXT:    .cfi_def_cfa_offset 16
-; RV32-BITS-256-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-BITS-256-NEXT:    vfirst.m a0, v0
-; RV32-BITS-256-NEXT:    seqz a0, a0
-; RV32-BITS-256-NEXT:    sb a0, 15(sp)
+; RV32-BITS-256-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
 ; RV32-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-256-NEXT:    slli a1, a0, 30
+; RV32-BITS-256-NEXT:    slli a1, a0, 24
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 14(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 29
+; RV32-BITS-256-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 25
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 13(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 28
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 26
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 12(sp)
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-BITS-256-NEXT:    slli a1, a0, 27
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 11(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 26
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 28
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 10(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 25
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 29
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 9(sp)
-; RV32-BITS-256-NEXT:    slli a0, a0, 24
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a0, a0, 30
 ; RV32-BITS-256-NEXT:    srli a0, a0, 31
-; RV32-BITS-256-NEXT:    sb a0, 8(sp)
-; RV32-BITS-256-NEXT:    addi a0, sp, 8
-; RV32-BITS-256-NEXT:    vle8.v v8, (a0)
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    vfirst.m a0, v0
+; RV32-BITS-256-NEXT:    seqz a0, a0
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-256-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-256-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-256-NEXT:    addi sp, sp, 16
 ; RV32-BITS-256-NEXT:    ret
 ;
 ; RV32-BITS-512-LABEL: reverse_v8i1:
 ; RV32-BITS-512:       # %bb.0:
-; RV32-BITS-512-NEXT:    addi sp, sp, -16
-; RV32-BITS-512-NEXT:    .cfi_def_cfa_offset 16
-; RV32-BITS-512-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-BITS-512-NEXT:    vfirst.m a0, v0
-; RV32-BITS-512-NEXT:    seqz a0, a0
-; RV32-BITS-512-NEXT:    sb a0, 15(sp)
+; RV32-BITS-512-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
 ; RV32-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-512-NEXT:    slli a1, a0, 30
+; RV32-BITS-512-NEXT:    slli a1, a0, 24
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 14(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 29
+; RV32-BITS-512-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 25
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 13(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 28
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 26
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 12(sp)
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-BITS-512-NEXT:    slli a1, a0, 27
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 11(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 26
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 28
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 10(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 25
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 29
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 9(sp)
-; RV32-BITS-512-NEXT:    slli a0, a0, 24
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a0, a0, 30
 ; RV32-BITS-512-NEXT:    srli a0, a0, 31
-; RV32-BITS-512-NEXT:    sb a0, 8(sp)
-; RV32-BITS-512-NEXT:    addi a0, sp, 8
-; RV32-BITS-512-NEXT:    vle8.v v8, (a0)
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    vfirst.m a0, v0
+; RV32-BITS-512-NEXT:    seqz a0, a0
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-512-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-512-NEXT:    addi sp, sp, 16
 ; RV32-BITS-512-NEXT:    ret
 ;
 ; RV64-BITS-UNKNOWN-LABEL: reverse_v8i1:
 ; RV64-BITS-UNKNOWN:       # %bb.0:
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, -16
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 16
-; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
-; RV64-BITS-UNKNOWN-NEXT:    seqz a0, a0
-; RV64-BITS-UNKNOWN-NEXT:    sb a0, 15(sp)
+; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 62
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 56
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 61
+; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 57
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 60
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 58
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 12(sp)
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 59
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 58
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 60
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 57
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 61
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 56
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 62
 ; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a0, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT:    addi a0, sp, 8
-; RV64-BITS-UNKNOWN-NEXT:    vle8.v v8, (a0)
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, 16
 ; RV64-BITS-UNKNOWN-NEXT:    ret
 ;
 ; RV64-BITS-256-LABEL: reverse_v8i1:
 ; RV64-BITS-256:       # %bb.0:
-; RV64-BITS-256-NEXT:    addi sp, sp, -16
-; RV64-BITS-256-NEXT:    .cfi_def_cfa_offset 16
-; RV64-BITS-256-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-BITS-256-NEXT:    vfirst.m a0, v0
-; RV64-BITS-256-NEXT:    seqz a0, a0
-; RV64-BITS-256-NEXT:    sb a0, 15(sp)
+; RV64-BITS-256-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
 ; RV64-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-256-NEXT:    slli a1, a0, 62
+; RV64-BITS-256-NEXT:    slli a1, a0, 56
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 14(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 61
+; RV64-BITS-256-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 57
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 13(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 60
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 58
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 12(sp)
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-BITS-256-NEXT:    slli a1, a0, 59
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 11(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 58
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 60
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 10(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 57
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 61
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 9(sp)
-; RV64-BITS-256-NEXT:    slli a0, a0, 56
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a0, a0, 62
 ; RV64-BITS-256-NEXT:    srli a0, a0, 63
-; RV64-BITS-256-NEXT:    sb a0, 8(sp)
-; RV64-BITS-256-NEXT:    addi a0, sp, 8
-; RV64-BITS-256-NEXT:    vle8.v v8, (a0)
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-256-NEXT:    vfirst.m a0, v0
+; RV64-BITS-256-NEXT:    seqz a0, a0
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-256-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-256-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-256-NEXT:    addi sp, sp, 16
 ; RV64-BITS-256-NEXT:    ret
 ;
 ; RV64-BITS-512-LABEL: reverse_v8i1:
 ; RV64-BITS-512:       # %bb.0:
-; RV64-BITS-512-NEXT:    addi sp, sp, -16
-; RV64-BITS-512-NEXT:    .cfi_def_cfa_offset 16
-; RV64-BITS-512-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-BITS-512-NEXT:    vfirst.m a0, v0
-; RV64-BITS-512-NEXT:    seqz a0, a0
-; RV64-BITS-512-NEXT:    sb a0, 15(sp)
+; RV64-BITS-512-NEXT:    vsetivli zero, 0, e8, mf8, ta, ma
 ; RV64-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-512-NEXT:    slli a1, a0, 62
+; RV64-BITS-512-NEXT:    slli a1, a0, 56
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 14(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 61
+; RV64-BITS-512-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 57
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 13(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 60
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 58
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 12(sp)
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-BITS-512-NEXT:    slli a1, a0, 59
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 11(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 58
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 60
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 10(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 57
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 61
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 9(sp)
-; RV64-BITS-512-NEXT:    slli a0, a0, 56
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a0, a0, 62
 ; RV64-BITS-512-NEXT:    srli a0, a0, 63
-; RV64-BITS-512-NEXT:    sb a0, 8(sp)
-; RV64-BITS-512-NEXT:    addi a0, sp, 8
-; RV64-BITS-512-NEXT:    vle8.v v8, (a0)
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-512-NEXT:    vfirst.m a0, v0
+; RV64-BITS-512-NEXT:    seqz a0, a0
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-512-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-512-NEXT:    addi sp, sp, 16
 ; RV64-BITS-512-NEXT:    ret
   %res = call <8 x i1> @llvm.experimental.vector.reverse.v8i1(<8 x i1> %a)
   ret <8 x i1> %res
@@ -291,380 +261,344 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
 define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
 ; RV32-BITS-UNKNOWN-LABEL: reverse_v16i1:
 ; RV32-BITS-UNKNOWN:       # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, -16
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 16
-; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
-; RV32-BITS-UNKNOWN-NEXT:    seqz a0, a0
-; RV32-BITS-UNKNOWN-NEXT:    sb a0, 15(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 30
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 16
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
+; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 17
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 18
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 27
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 19
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 20
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 21
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 24
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 22
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 8(sp)
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 23
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 22
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 24
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 21
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 20
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 19
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 27
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 18
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 17
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a0, a0, 16
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a0, 30
 ; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a0, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT:    mv a0, sp
-; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vle8.v v8, (a0)
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, 16
 ; RV32-BITS-UNKNOWN-NEXT:    ret
 ;
 ; RV32-BITS-256-LABEL: reverse_v16i1:
 ; RV32-BITS-256:       # %bb.0:
-; RV32-BITS-256-NEXT:    addi sp, sp, -16
-; RV32-BITS-256-NEXT:    .cfi_def_cfa_offset 16
-; RV32-BITS-256-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-256-NEXT:    vfirst.m a0, v0
-; RV32-BITS-256-NEXT:    seqz a0, a0
-; RV32-BITS-256-NEXT:    sb a0, 15(sp)
 ; RV32-BITS-256-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV32-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-256-NEXT:    slli a1, a0, 30
+; RV32-BITS-256-NEXT:    slli a1, a0, 16
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 14(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 29
+; RV32-BITS-256-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 17
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 13(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 28
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 18
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 12(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 27
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 19
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 11(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 26
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 20
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 10(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 25
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 21
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 9(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 24
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 22
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 8(sp)
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-BITS-256-NEXT:    slli a1, a0, 23
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 7(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 22
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 24
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 6(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 21
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 25
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 5(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 20
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 26
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 4(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 19
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 27
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 3(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 18
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 28
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 2(sp)
-; RV32-BITS-256-NEXT:    slli a1, a0, 17
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 29
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 1(sp)
-; RV32-BITS-256-NEXT:    slli a0, a0, 16
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a0, a0, 30
 ; RV32-BITS-256-NEXT:    srli a0, a0, 31
-; RV32-BITS-256-NEXT:    sb a0, 0(sp)
-; RV32-BITS-256-NEXT:    mv a0, sp
-; RV32-BITS-256-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-256-NEXT:    vle8.v v8, (a0)
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    vfirst.m a0, v0
+; RV32-BITS-256-NEXT:    seqz a0, a0
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-256-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-256-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-256-NEXT:    addi sp, sp, 16
 ; RV32-BITS-256-NEXT:    ret
 ;
 ; RV32-BITS-512-LABEL: reverse_v16i1:
 ; RV32-BITS-512:       # %bb.0:
-; RV32-BITS-512-NEXT:    addi sp, sp, -16
-; RV32-BITS-512-NEXT:    .cfi_def_cfa_offset 16
-; RV32-BITS-512-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-512-NEXT:    vfirst.m a0, v0
-; RV32-BITS-512-NEXT:    seqz a0, a0
-; RV32-BITS-512-NEXT:    sb a0, 15(sp)
 ; RV32-BITS-512-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV32-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV32-BITS-512-NEXT:    slli a1, a0, 30
+; RV32-BITS-512-NEXT:    slli a1, a0, 16
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 14(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 29
+; RV32-BITS-512-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 17
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 13(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 28
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 18
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 12(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 27
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 19
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 11(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 26
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 20
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 10(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 25
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 21
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 9(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 24
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 22
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 8(sp)
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-BITS-512-NEXT:    slli a1, a0, 23
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 7(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 22
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 24
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 6(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 21
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 25
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 5(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 20
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 26
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 4(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 19
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 27
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 3(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 18
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 28
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 2(sp)
-; RV32-BITS-512-NEXT:    slli a1, a0, 17
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 29
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 1(sp)
-; RV32-BITS-512-NEXT:    slli a0, a0, 16
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a0, a0, 30
 ; RV32-BITS-512-NEXT:    srli a0, a0, 31
-; RV32-BITS-512-NEXT:    sb a0, 0(sp)
-; RV32-BITS-512-NEXT:    mv a0, sp
-; RV32-BITS-512-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-512-NEXT:    vle8.v v8, (a0)
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    vfirst.m a0, v0
+; RV32-BITS-512-NEXT:    seqz a0, a0
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-512-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-512-NEXT:    addi sp, sp, 16
 ; RV32-BITS-512-NEXT:    ret
 ;
 ; RV64-BITS-UNKNOWN-LABEL: reverse_v16i1:
 ; RV64-BITS-UNKNOWN:       # %bb.0:
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, -16
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 16
-; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
-; RV64-BITS-UNKNOWN-NEXT:    seqz a0, a0
-; RV64-BITS-UNKNOWN-NEXT:    sb a0, 15(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 62
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 48
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 61
+; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 49
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 60
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 50
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 59
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 51
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 58
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 52
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 57
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 53
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 56
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 54
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 8(sp)
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 55
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 54
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 56
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 53
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 57
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 52
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 58
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 51
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 59
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 50
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 60
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 49
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 61
 ; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 48
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 62
 ; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a0, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT:    mv a0, sp
-; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vle8.v v8, (a0)
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, 16
 ; RV64-BITS-UNKNOWN-NEXT:    ret
 ;
 ; RV64-BITS-256-LABEL: reverse_v16i1:
 ; RV64-BITS-256:       # %bb.0:
-; RV64-BITS-256-NEXT:    addi sp, sp, -16
-; RV64-BITS-256-NEXT:    .cfi_def_cfa_offset 16
-; RV64-BITS-256-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-256-NEXT:    vfirst.m a0, v0
-; RV64-BITS-256-NEXT:    seqz a0, a0
-; RV64-BITS-256-NEXT:    sb a0, 15(sp)
 ; RV64-BITS-256-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV64-BITS-256-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-256-NEXT:    slli a1, a0, 62
+; RV64-BITS-256-NEXT:    slli a1, a0, 48
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 14(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 61
+; RV64-BITS-256-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 49
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 13(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 60
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 50
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 12(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 59
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 51
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 11(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 58
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 52
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 10(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 57
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 53
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 9(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 56
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 54
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 8(sp)
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-BITS-256-NEXT:    slli a1, a0, 55
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 7(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 54
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 56
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 6(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 53
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 57
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 5(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 52
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 58
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 4(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 51
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 59
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 3(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 50
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 60
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 2(sp)
-; RV64-BITS-256-NEXT:    slli a1, a0, 49
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 61
 ; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 1(sp)
-; RV64-BITS-256-NEXT:    slli a0, a0, 48
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a0, a0, 62
 ; RV64-BITS-256-NEXT:    srli a0, a0, 63
-; RV64-BITS-256-NEXT:    sb a0, 0(sp)
-; RV64-BITS-256-NEXT:    mv a0, sp
-; RV64-BITS-256-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-256-NEXT:    vle8.v v8, (a0)
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-256-NEXT:    vfirst.m a0, v0
+; RV64-BITS-256-NEXT:    seqz a0, a0
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-256-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-256-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-256-NEXT:    addi sp, sp, 16
 ; RV64-BITS-256-NEXT:    ret
 ;
 ; RV64-BITS-512-LABEL: reverse_v16i1:
 ; RV64-BITS-512:       # %bb.0:
-; RV64-BITS-512-NEXT:    addi sp, sp, -16
-; RV64-BITS-512-NEXT:    .cfi_def_cfa_offset 16
-; RV64-BITS-512-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-512-NEXT:    vfirst.m a0, v0
-; RV64-BITS-512-NEXT:    seqz a0, a0
-; RV64-BITS-512-NEXT:    sb a0, 15(sp)
 ; RV64-BITS-512-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV64-BITS-512-NEXT:    vmv.x.s a0, v0
-; RV64-BITS-512-NEXT:    slli a1, a0, 62
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 14(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 61
+; RV64-BITS-512-NEXT:    slli a1, a0, 48
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 13(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 60
+; RV64-BITS-512-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 49
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 12(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 59
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 50
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 11(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 58
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 51
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 10(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 57
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 52
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 9(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 56
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 53
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 54
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 8(sp)
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-BITS-512-NEXT:    slli a1, a0, 55
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 7(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 54
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 56
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 6(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 53
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 57
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 5(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 52
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 58
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 4(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 51
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 59
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 3(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 50
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 60
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 2(sp)
-; RV64-BITS-512-NEXT:    slli a1, a0, 49
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 61
 ; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 1(sp)
-; RV64-BITS-512-NEXT:    slli a0, a0, 48
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a0, a0, 62
 ; RV64-BITS-512-NEXT:    srli a0, a0, 63
-; RV64-BITS-512-NEXT:    sb a0, 0(sp)
-; RV64-BITS-512-NEXT:    mv a0, sp
-; RV64-BITS-512-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-512-NEXT:    vle8.v v8, (a0)
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-512-NEXT:    vfirst.m a0, v0
+; RV64-BITS-512-NEXT:    seqz a0, a0
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-512-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-512-NEXT:    addi sp, sp, 16
 ; RV64-BITS-512-NEXT:    ret
   %res = call <16 x i1> @llvm.experimental.vector.reverse.v16i1(<16 x i1> %a)
   ret <16 x i1> %res
@@ -673,728 +607,632 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
 define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
 ; RV32-BITS-UNKNOWN-LABEL: reverse_v32i1:
 ; RV32-BITS-UNKNOWN:       # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, -64
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 64
-; RV32-BITS-UNKNOWN-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_offset ra, -4
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_offset s0, -8
-; RV32-BITS-UNKNOWN-NEXT:    addi s0, sp, 64
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa s0, 0
-; RV32-BITS-UNKNOWN-NEXT:    andi sp, sp, -32
-; RV32-BITS-UNKNOWN-NEXT:    li a0, 32
-; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a1, v0
-; RV32-BITS-UNKNOWN-NEXT:    seqz a1, a1
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 31(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v0
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 30
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 30(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 29
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 29(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 28
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 28(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 27
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 27(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 26
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 26(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 25
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 25(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 24
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 24(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 23
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 23(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 22
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 22(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 21
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 21(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 20
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 20(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 19
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 19(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 18
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 18(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 17
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 17(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 16
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 16(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 15
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 15(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 14
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 13
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 12
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 11
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 10
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 9
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 8
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 7
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 6
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 5
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 4
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 3
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 2
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a1, 1
+; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    li a2, 32
+; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 1
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 2
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 3
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 4
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 5
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 6
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 7
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 8
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 9
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 10
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 11
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 12
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 13
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 14
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 15
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 16
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 17
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 18
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 19
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 20
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 21
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 22
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 23
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 24
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 27
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
 ; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT:    mv a1, sp
-; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vle8.v v8, (a1)
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a0, 30
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, s0, -64
-; RV32-BITS-UNKNOWN-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, 64
 ; RV32-BITS-UNKNOWN-NEXT:    ret
 ;
 ; RV32-BITS-256-LABEL: reverse_v32i1:
 ; RV32-BITS-256:       # %bb.0:
-; RV32-BITS-256-NEXT:    addi sp, sp, -64
-; RV32-BITS-256-NEXT:    .cfi_def_cfa_offset 64
-; RV32-BITS-256-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT:    .cfi_offset ra, -4
-; RV32-BITS-256-NEXT:    .cfi_offset s0, -8
-; RV32-BITS-256-NEXT:    addi s0, sp, 64
-; RV32-BITS-256-NEXT:    .cfi_def_cfa s0, 0
-; RV32-BITS-256-NEXT:    andi sp, sp, -32
-; RV32-BITS-256-NEXT:    li a0, 32
-; RV32-BITS-256-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-256-NEXT:    vfirst.m a1, v0
-; RV32-BITS-256-NEXT:    seqz a1, a1
-; RV32-BITS-256-NEXT:    sb a1, 31(sp)
 ; RV32-BITS-256-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-256-NEXT:    vmv.x.s a1, v0
-; RV32-BITS-256-NEXT:    srli a2, a1, 31
-; RV32-BITS-256-NEXT:    sb a2, 0(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 30
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 30(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 29
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 29(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 28
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 28(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 27
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 27(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 26
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 26(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 25
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 25(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 24
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 24(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 23
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 23(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 22
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 22(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 21
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 21(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 20
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 20(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 19
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 19(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 18
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 18(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 17
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 17(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 16
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 16(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 15
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 15(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 14
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 14(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 13
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 13(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 12
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 12(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 11
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 11(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 10
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 10(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 9
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 9(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 8
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 8(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 7
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 7(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 6
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 6(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 5
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 5(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 4
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 4(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 3
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 3(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 2
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 2(sp)
-; RV32-BITS-256-NEXT:    slli a1, a1, 1
+; RV32-BITS-256-NEXT:    vmv.x.s a0, v0
+; RV32-BITS-256-NEXT:    srli a1, a0, 31
+; RV32-BITS-256-NEXT:    li a2, 32
+; RV32-BITS-256-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 1
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 2
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 3
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 4
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 5
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 6
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 7
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 8
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 9
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 10
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 11
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 12
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 13
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 14
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 15
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 16
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 17
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 18
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 19
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 20
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 21
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 22
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 23
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 24
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 25
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 26
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 27
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 28
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a1, a0, 29
 ; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 1(sp)
-; RV32-BITS-256-NEXT:    mv a1, sp
-; RV32-BITS-256-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-256-NEXT:    vle8.v v8, (a1)
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    slli a0, a0, 30
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    vfirst.m a0, v0
+; RV32-BITS-256-NEXT:    seqz a0, a0
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-256-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-256-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-256-NEXT:    addi sp, s0, -64
-; RV32-BITS-256-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT:    addi sp, sp, 64
 ; RV32-BITS-256-NEXT:    ret
 ;
 ; RV32-BITS-512-LABEL: reverse_v32i1:
 ; RV32-BITS-512:       # %bb.0:
-; RV32-BITS-512-NEXT:    addi sp, sp, -64
-; RV32-BITS-512-NEXT:    .cfi_def_cfa_offset 64
-; RV32-BITS-512-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT:    .cfi_offset ra, -4
-; RV32-BITS-512-NEXT:    .cfi_offset s0, -8
-; RV32-BITS-512-NEXT:    addi s0, sp, 64
-; RV32-BITS-512-NEXT:    .cfi_def_cfa s0, 0
-; RV32-BITS-512-NEXT:    andi sp, sp, -32
-; RV32-BITS-512-NEXT:    li a0, 32
-; RV32-BITS-512-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-512-NEXT:    vfirst.m a1, v0
-; RV32-BITS-512-NEXT:    seqz a1, a1
-; RV32-BITS-512-NEXT:    sb a1, 31(sp)
 ; RV32-BITS-512-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-512-NEXT:    vmv.x.s a1, v0
-; RV32-BITS-512-NEXT:    srli a2, a1, 31
-; RV32-BITS-512-NEXT:    sb a2, 0(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 30
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 30(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 29
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 29(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 28
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 28(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 27
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 27(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 26
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 26(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 25
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 25(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 24
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 24(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 23
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 23(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 22
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 22(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 21
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 21(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 20
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 20(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 19
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 19(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 18
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 18(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 17
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 17(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 16
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 16(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 15
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 15(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 14
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 14(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 13
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 13(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 12
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 12(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 11
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 11(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 10
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 10(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 9
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 9(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 8
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 8(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 7
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 7(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 6
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 6(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 5
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 5(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 4
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 4(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 3
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 3(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 2
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 2(sp)
-; RV32-BITS-512-NEXT:    slli a1, a1, 1
+; RV32-BITS-512-NEXT:    vmv.x.s a0, v0
+; RV32-BITS-512-NEXT:    srli a1, a0, 31
+; RV32-BITS-512-NEXT:    li a2, 32
+; RV32-BITS-512-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 1
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 2
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 3
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 4
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 5
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 6
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 7
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 8
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 9
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 10
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 11
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 12
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 13
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 14
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 15
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 16
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 17
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 18
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 19
 ; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 1(sp)
-; RV32-BITS-512-NEXT:    mv a1, sp
-; RV32-BITS-512-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-512-NEXT:    vle8.v v8, (a1)
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 20
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 21
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 22
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 23
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 24
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 25
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 26
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 27
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 28
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a1, a0, 29
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    slli a0, a0, 30
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    vfirst.m a0, v0
+; RV32-BITS-512-NEXT:    seqz a0, a0
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-512-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-512-NEXT:    addi sp, s0, -64
-; RV32-BITS-512-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT:    addi sp, sp, 64
 ; RV32-BITS-512-NEXT:    ret
 ;
 ; RV64-BITS-UNKNOWN-LABEL: reverse_v32i1:
 ; RV64-BITS-UNKNOWN:       # %bb.0:
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, -64
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 64
-; RV64-BITS-UNKNOWN-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_offset ra, -8
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_offset s0, -16
-; RV64-BITS-UNKNOWN-NEXT:    addi s0, sp, 64
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa s0, 0
-; RV64-BITS-UNKNOWN-NEXT:    andi sp, sp, -32
-; RV64-BITS-UNKNOWN-NEXT:    li a0, 32
-; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a1, v0
-; RV64-BITS-UNKNOWN-NEXT:    seqz a1, a1
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 31(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v0
-; RV64-BITS-UNKNOWN-NEXT:    srliw a2, a1, 31
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 62
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 30(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 61
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 29(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 60
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 28(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 59
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 27(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 58
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 26(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 57
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 25(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 56
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 24(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 55
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 23(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 54
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 22(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 53
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 21(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 52
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 20(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 51
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 19(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 50
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 18(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 49
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 17(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 48
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 16(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 47
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 15(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 46
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 45
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 44
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 43
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 42
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 41
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 40
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 39
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 38
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 37
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 36
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 35
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 34
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a1, 33
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT:    mv a1, sp
-; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vle8.v v8, (a1)
+; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
+; RV64-BITS-UNKNOWN-NEXT:    srliw a1, a0, 31
+; RV64-BITS-UNKNOWN-NEXT:    li a2, 32
+; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 33
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 34
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 35
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 36
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 37
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 38
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 39
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 40
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 41
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 42
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 43
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 44
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 45
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 46
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 47
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 48
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 49
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 50
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 51
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 52
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 53
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 54
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 55
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 56
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 57
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 58
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 59
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 60
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 61
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 62
+; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, s0, -64
-; RV64-BITS-UNKNOWN-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, 64
 ; RV64-BITS-UNKNOWN-NEXT:    ret
 ;
 ; RV64-BITS-256-LABEL: reverse_v32i1:
 ; RV64-BITS-256:       # %bb.0:
-; RV64-BITS-256-NEXT:    addi sp, sp, -64
-; RV64-BITS-256-NEXT:    .cfi_def_cfa_offset 64
-; RV64-BITS-256-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT:    .cfi_offset ra, -8
-; RV64-BITS-256-NEXT:    .cfi_offset s0, -16
-; RV64-BITS-256-NEXT:    addi s0, sp, 64
-; RV64-BITS-256-NEXT:    .cfi_def_cfa s0, 0
-; RV64-BITS-256-NEXT:    andi sp, sp, -32
-; RV64-BITS-256-NEXT:    li a0, 32
-; RV64-BITS-256-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-256-NEXT:    vfirst.m a1, v0
-; RV64-BITS-256-NEXT:    seqz a1, a1
-; RV64-BITS-256-NEXT:    sb a1, 31(sp)
 ; RV64-BITS-256-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-256-NEXT:    vmv.x.s a1, v0
-; RV64-BITS-256-NEXT:    srliw a2, a1, 31
-; RV64-BITS-256-NEXT:    sb a2, 0(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 62
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 30(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 61
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 29(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 60
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 28(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 59
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 27(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 58
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 26(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 57
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 25(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 56
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 24(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 55
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 23(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 54
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 22(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 53
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 21(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 52
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 20(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 51
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 19(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 50
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 18(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 49
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 17(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 48
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 16(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 47
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 15(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 46
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 14(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 45
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 13(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 44
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 12(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 43
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 11(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 42
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 10(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 41
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 9(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 40
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 8(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 39
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 7(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 38
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 6(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 37
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 5(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 36
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 4(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 35
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 3(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 34
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 2(sp)
-; RV64-BITS-256-NEXT:    slli a1, a1, 33
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 1(sp)
-; RV64-BITS-256-NEXT:    mv a1, sp
-; RV64-BITS-256-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-256-NEXT:    vle8.v v8, (a1)
+; RV64-BITS-256-NEXT:    vmv.x.s a0, v0
+; RV64-BITS-256-NEXT:    srliw a1, a0, 31
+; RV64-BITS-256-NEXT:    li a2, 32
+; RV64-BITS-256-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 33
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 34
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 35
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 36
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 37
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 38
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 39
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 40
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 41
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 42
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 43
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 44
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 45
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 46
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 47
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 48
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 49
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 50
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 51
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 52
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 53
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 54
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 55
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 56
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 57
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 58
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 59
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 60
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 61
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a0, a0, 62
+; RV64-BITS-256-NEXT:    srli a0, a0, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-256-NEXT:    vfirst.m a0, v0
+; RV64-BITS-256-NEXT:    seqz a0, a0
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-256-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-256-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-256-NEXT:    addi sp, s0, -64
-; RV64-BITS-256-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT:    addi sp, sp, 64
 ; RV64-BITS-256-NEXT:    ret
 ;
 ; RV64-BITS-512-LABEL: reverse_v32i1:
 ; RV64-BITS-512:       # %bb.0:
-; RV64-BITS-512-NEXT:    addi sp, sp, -64
-; RV64-BITS-512-NEXT:    .cfi_def_cfa_offset 64
-; RV64-BITS-512-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT:    .cfi_offset ra, -8
-; RV64-BITS-512-NEXT:    .cfi_offset s0, -16
-; RV64-BITS-512-NEXT:    addi s0, sp, 64
-; RV64-BITS-512-NEXT:    .cfi_def_cfa s0, 0
-; RV64-BITS-512-NEXT:    andi sp, sp, -32
-; RV64-BITS-512-NEXT:    li a0, 32
-; RV64-BITS-512-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-512-NEXT:    vfirst.m a1, v0
-; RV64-BITS-512-NEXT:    seqz a1, a1
-; RV64-BITS-512-NEXT:    sb a1, 31(sp)
 ; RV64-BITS-512-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-512-NEXT:    vmv.x.s a1, v0
-; RV64-BITS-512-NEXT:    srliw a2, a1, 31
-; RV64-BITS-512-NEXT:    sb a2, 0(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 62
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 30(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 61
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 29(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 60
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 28(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 59
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 27(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 58
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 26(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 57
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 25(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 56
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 24(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 55
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 23(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 54
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 22(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 53
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 21(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 52
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 20(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 51
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 19(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 50
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 18(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 49
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 17(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 48
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 16(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 47
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 15(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 46
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 14(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 45
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 13(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 44
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 12(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 43
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 11(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 42
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 10(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 41
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 9(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 40
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 8(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 39
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 7(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 38
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 6(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 37
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 5(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 36
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 4(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 35
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 3(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 34
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 2(sp)
-; RV64-BITS-512-NEXT:    slli a1, a1, 33
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 1(sp)
-; RV64-BITS-512-NEXT:    mv a1, sp
-; RV64-BITS-512-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-512-NEXT:    vle8.v v8, (a1)
+; RV64-BITS-512-NEXT:    vmv.x.s a0, v0
+; RV64-BITS-512-NEXT:    srliw a1, a0, 31
+; RV64-BITS-512-NEXT:    li a2, 32
+; RV64-BITS-512-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 33
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 34
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 35
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 36
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 37
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 38
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 39
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 40
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 41
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 42
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 43
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 44
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 45
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 46
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 47
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 48
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 49
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 50
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 51
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 52
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 53
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 54
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 55
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 56
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 57
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 58
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 59
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 60
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 61
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a0, a0, 62
+; RV64-BITS-512-NEXT:    srli a0, a0, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-512-NEXT:    vfirst.m a0, v0
+; RV64-BITS-512-NEXT:    seqz a0, a0
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-512-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-512-NEXT:    addi sp, s0, -64
-; RV64-BITS-512-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT:    addi sp, sp, 64
 ; RV64-BITS-512-NEXT:    ret
   %res = call <32 x i1> @llvm.experimental.vector.reverse.v32i1(<32 x i1> %a)
   ret <32 x i1> %res
@@ -1403,1301 +1241,1211 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
 define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
 ; RV32-BITS-UNKNOWN-LABEL: reverse_v64i1:
 ; RV32-BITS-UNKNOWN:       # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, -128
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 128
-; RV32-BITS-UNKNOWN-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_offset ra, -4
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_offset s0, -8
-; RV32-BITS-UNKNOWN-NEXT:    addi s0, sp, 128
-; RV32-BITS-UNKNOWN-NEXT:    .cfi_def_cfa s0, 0
-; RV32-BITS-UNKNOWN-NEXT:    andi sp, sp, -64
-; RV32-BITS-UNKNOWN-NEXT:    li a0, 64
-; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a1, v0
-; RV32-BITS-UNKNOWN-NEXT:    seqz a1, a1
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 63(sp)
 ; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-UNKNOWN-NEXT:    vslidedown.vi v8, v0, 1
+; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v8
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 32(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 30
+; RV32-BITS-UNKNOWN-NEXT:    li a0, 64
+; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 1
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 62(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 29
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 2
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 61(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 28
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 3
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 60(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 27
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 4
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 59(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 26
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 5
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 58(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 25
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 6
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 57(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 24
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 7
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 56(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 23
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 8
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 55(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 22
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 9
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 54(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 21
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 10
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 53(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 20
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 11
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 52(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 19
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 12
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 51(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 18
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 50(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 17
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 49(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 16
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 48(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 15
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 47(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 14
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 46(sp)
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 13
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 45(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 12
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 44(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 11
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 43(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 10
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 42(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 9
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 41(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 8
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 40(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 7
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 39(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 6
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 38(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 5
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 37(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 4
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 36(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 3
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 35(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 2
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 34(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a1, 1
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 33(sp)
-; RV32-BITS-UNKNOWN-NEXT:    vslidedown.vi v8, v0, 1
-; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v8
-; RV32-BITS-UNKNOWN-NEXT:    andi a2, a1, 1
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 31(sp)
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 30
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 30(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 29
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 29(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 28
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 28(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 27
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 27(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 26
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 26(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 25
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 25(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 24
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 24(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 23
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 23(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 22
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 22(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 21
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 21(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 20
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 20(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 19
-; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 19(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 18
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 14
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 18(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 17
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 15
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 17(sp)
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 16
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 16(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 15
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 17
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 15(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 14
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 18
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 13
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 19
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 12
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 20
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 11
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 21
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 10
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 22
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 9
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 23
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 8
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 24
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 7
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 25
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 6
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 26
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 5
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 27
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 4
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 28
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 3
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 29
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 2
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a2, a1, 30
 ; RV32-BITS-UNKNOWN-NEXT:    srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a2, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT:    slli a1, a1, 1
-; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT:    mv a1, sp
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    andi a1, a1, 1
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-UNKNOWN-NEXT:    srli a2, a1, 31
 ; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vle8.v v8, (a1)
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 1
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 2
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 3
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 4
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 5
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 6
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 7
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 8
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 9
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 10
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 11
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 12
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 13
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 14
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 15
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 16
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 17
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 18
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 19
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 20
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 21
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 22
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 23
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 24
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 25
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 26
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 27
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 28
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a0, a1, 29
+; RV32-BITS-UNKNOWN-NEXT:    srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT:    slli a1, a1, 30
+; RV32-BITS-UNKNOWN-NEXT:    srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, s0, -128
-; RV32-BITS-UNKNOWN-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT:    addi sp, sp, 128
 ; RV32-BITS-UNKNOWN-NEXT:    ret
 ;
 ; RV32-BITS-256-LABEL: reverse_v64i1:
 ; RV32-BITS-256:       # %bb.0:
-; RV32-BITS-256-NEXT:    addi sp, sp, -128
-; RV32-BITS-256-NEXT:    .cfi_def_cfa_offset 128
-; RV32-BITS-256-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT:    .cfi_offset ra, -4
-; RV32-BITS-256-NEXT:    .cfi_offset s0, -8
-; RV32-BITS-256-NEXT:    addi s0, sp, 128
-; RV32-BITS-256-NEXT:    .cfi_def_cfa s0, 0
-; RV32-BITS-256-NEXT:    andi sp, sp, -64
-; RV32-BITS-256-NEXT:    li a0, 64
-; RV32-BITS-256-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-256-NEXT:    vfirst.m a1, v0
-; RV32-BITS-256-NEXT:    seqz a1, a1
-; RV32-BITS-256-NEXT:    sb a1, 63(sp)
 ; RV32-BITS-256-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-256-NEXT:    vmv.x.s a1, v0
-; RV32-BITS-256-NEXT:    srli a2, a1, 31
-; RV32-BITS-256-NEXT:    sb a2, 32(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 30
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 62(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 29
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 61(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 28
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 60(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 27
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 59(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 26
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 58(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 25
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 57(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 24
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 56(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 23
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 55(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 22
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 54(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 21
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 53(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 20
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 52(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 19
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 51(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 18
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 50(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 17
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 49(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 16
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 48(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 15
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 47(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 14
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 46(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 13
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 45(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 12
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 44(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 11
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 43(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 10
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 42(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 9
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 41(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 8
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 40(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 7
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 39(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 6
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 38(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 5
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 37(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 4
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 36(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 3
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 35(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 2
-; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 34(sp)
-; RV32-BITS-256-NEXT:    slli a1, a1, 1
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 33(sp)
 ; RV32-BITS-256-NEXT:    vslidedown.vi v8, v0, 1
 ; RV32-BITS-256-NEXT:    vmv.x.s a1, v8
-; RV32-BITS-256-NEXT:    andi a2, a1, 1
-; RV32-BITS-256-NEXT:    sb a2, 31(sp)
 ; RV32-BITS-256-NEXT:    srli a2, a1, 31
-; RV32-BITS-256-NEXT:    sb a2, 0(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 30
+; RV32-BITS-256-NEXT:    li a0, 64
+; RV32-BITS-256-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 1
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 30(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 29
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 2
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 29(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 28
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 3
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 28(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 27
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 4
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 27(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 26
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 5
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 26(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 25
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 6
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 25(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 24
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 7
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 24(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 23
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 8
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 23(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 22
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 9
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 22(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 21
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 10
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 21(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 20
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 11
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 20(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 19
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 12
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 19(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 18
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 13
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 18(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 17
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 14
+; RV32-BITS-256-NEXT:    srli a2, a2, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 15
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 17(sp)
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32-BITS-256-NEXT:    slli a2, a1, 16
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 16(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 15
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 17
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 15(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 14
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 18
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 14(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 13
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 19
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 13(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 12
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 20
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 12(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 11
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 21
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 11(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 10
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 22
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 10(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 9
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 23
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 9(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 8
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 24
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 8(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 7
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 25
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 7(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 6
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 26
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 6(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 5
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 27
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 5(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 4
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 28
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 4(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 3
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 29
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 3(sp)
-; RV32-BITS-256-NEXT:    slli a2, a1, 2
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a2, a1, 30
 ; RV32-BITS-256-NEXT:    srli a2, a2, 31
-; RV32-BITS-256-NEXT:    sb a2, 2(sp)
-; RV32-BITS-256-NEXT:    slli a1, a1, 1
-; RV32-BITS-256-NEXT:    srli a1, a1, 31
-; RV32-BITS-256-NEXT:    sb a1, 1(sp)
-; RV32-BITS-256-NEXT:    mv a1, sp
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    andi a1, a1, 1
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-256-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-256-NEXT:    srli a2, a1, 31
 ; RV32-BITS-256-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-256-NEXT:    vle8.v v8, (a1)
-; RV32-BITS-256-NEXT:    vand.vi v8, v8, 1
-; RV32-BITS-256-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-256-NEXT:    addi sp, s0, -128
-; RV32-BITS-256-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT:    addi sp, sp, 128
-; RV32-BITS-256-NEXT:    ret
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT:    slli a0, a1, 1
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 2
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 3
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 4
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 5
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 6
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 7
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 8
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 9
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 10
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 11
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 12
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 13
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 14
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 15
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 16
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 17
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 18
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 19
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 20
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 21
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 22
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 23
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 24
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 25
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 26
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 27
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 28
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a0, a1, 29
+; RV32-BITS-256-NEXT:    srli a0, a0, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    slli a1, a1, 30
+; RV32-BITS-256-NEXT:    srli a1, a1, 31
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT:    vfirst.m a0, v0
+; RV32-BITS-256-NEXT:    seqz a0, a0
+; RV32-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT:    vand.vi v8, v8, 1
+; RV32-BITS-256-NEXT:    vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT:    ret
 ;
 ; RV32-BITS-512-LABEL: reverse_v64i1:
 ; RV32-BITS-512:       # %bb.0:
-; RV32-BITS-512-NEXT:    addi sp, sp, -128
-; RV32-BITS-512-NEXT:    .cfi_def_cfa_offset 128
-; RV32-BITS-512-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT:    .cfi_offset ra, -4
-; RV32-BITS-512-NEXT:    .cfi_offset s0, -8
-; RV32-BITS-512-NEXT:    addi s0, sp, 128
-; RV32-BITS-512-NEXT:    .cfi_def_cfa s0, 0
-; RV32-BITS-512-NEXT:    andi sp, sp, -64
-; RV32-BITS-512-NEXT:    li a0, 64
-; RV32-BITS-512-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-512-NEXT:    vfirst.m a1, v0
-; RV32-BITS-512-NEXT:    seqz a1, a1
-; RV32-BITS-512-NEXT:    sb a1, 63(sp)
 ; RV32-BITS-512-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-512-NEXT:    vmv.x.s a1, v0
-; RV32-BITS-512-NEXT:    srli a2, a1, 31
-; RV32-BITS-512-NEXT:    sb a2, 32(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 30
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 62(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 29
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 61(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 28
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 60(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 27
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 59(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 26
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 58(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 25
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 57(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 24
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 56(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 23
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 55(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 22
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 54(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 21
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 53(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 20
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 52(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 19
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 51(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 18
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 50(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 17
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 49(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 16
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 48(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 15
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 47(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 14
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 46(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 13
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 45(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 12
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 44(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 11
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 43(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 10
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 42(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 9
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 41(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 8
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 40(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 7
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 39(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 6
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 38(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 5
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 37(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 4
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 36(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 3
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 35(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 2
-; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 34(sp)
-; RV32-BITS-512-NEXT:    slli a1, a1, 1
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 33(sp)
 ; RV32-BITS-512-NEXT:    vslidedown.vi v8, v0, 1
 ; RV32-BITS-512-NEXT:    vmv.x.s a1, v8
-; RV32-BITS-512-NEXT:    andi a2, a1, 1
-; RV32-BITS-512-NEXT:    sb a2, 31(sp)
 ; RV32-BITS-512-NEXT:    srli a2, a1, 31
-; RV32-BITS-512-NEXT:    sb a2, 0(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 30
+; RV32-BITS-512-NEXT:    li a0, 64
+; RV32-BITS-512-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 1
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 30(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 29
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 2
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 29(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 28
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 3
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 28(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 27
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 4
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 27(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 26
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 5
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 26(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 25
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 6
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 25(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 24
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 7
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 24(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 23
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 8
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 23(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 22
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 9
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 22(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 21
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 10
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 21(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 20
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 11
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 20(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 19
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 12
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 19(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 18
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 13
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 18(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 17
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 14
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 17(sp)
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 15
+; RV32-BITS-512-NEXT:    srli a2, a2, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32-BITS-512-NEXT:    slli a2, a1, 16
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 16(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 15
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 17
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 15(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 14
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 18
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 14(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 13
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 19
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 13(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 12
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 20
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 12(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 11
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 21
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 11(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 10
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 22
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 10(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 9
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 23
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 9(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 8
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 24
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 8(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 7
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 25
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 7(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 6
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 26
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 6(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 5
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 27
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 5(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 4
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 28
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 4(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 3
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 29
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 3(sp)
-; RV32-BITS-512-NEXT:    slli a2, a1, 2
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a2, a1, 30
 ; RV32-BITS-512-NEXT:    srli a2, a2, 31
-; RV32-BITS-512-NEXT:    sb a2, 2(sp)
-; RV32-BITS-512-NEXT:    slli a1, a1, 1
-; RV32-BITS-512-NEXT:    srli a1, a1, 31
-; RV32-BITS-512-NEXT:    sb a1, 1(sp)
-; RV32-BITS-512-NEXT:    mv a1, sp
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    andi a1, a1, 1
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-512-NEXT:    vmv.x.s a1, v0
+; RV32-BITS-512-NEXT:    srli a2, a1, 31
 ; RV32-BITS-512-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-512-NEXT:    vle8.v v8, (a1)
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT:    slli a0, a1, 1
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 2
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 3
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 4
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 5
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 6
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 7
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 8
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 9
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 10
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 11
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 12
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 13
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 14
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 15
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 16
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 17
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 18
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 19
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 20
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 21
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 22
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 23
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 24
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 25
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 26
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 27
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 28
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a0, a1, 29
+; RV32-BITS-512-NEXT:    srli a0, a0, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT:    slli a1, a1, 30
+; RV32-BITS-512-NEXT:    srli a1, a1, 31
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT:    vfirst.m a0, v0
+; RV32-BITS-512-NEXT:    seqz a0, a0
+; RV32-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV32-BITS-512-NEXT:    vmsne.vi v0, v8, 0
-; RV32-BITS-512-NEXT:    addi sp, s0, -128
-; RV32-BITS-512-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT:    addi sp, sp, 128
 ; RV32-BITS-512-NEXT:    ret
 ;
 ; RV64-BITS-UNKNOWN-LABEL: reverse_v64i1:
 ; RV64-BITS-UNKNOWN:       # %bb.0:
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, -128
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa_offset 128
-; RV64-BITS-UNKNOWN-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_offset ra, -8
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_offset s0, -16
-; RV64-BITS-UNKNOWN-NEXT:    addi s0, sp, 128
-; RV64-BITS-UNKNOWN-NEXT:    .cfi_def_cfa s0, 0
-; RV64-BITS-UNKNOWN-NEXT:    andi sp, sp, -64
-; RV64-BITS-UNKNOWN-NEXT:    li a0, 64
-; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a1, v0
-; RV64-BITS-UNKNOWN-NEXT:    seqz a1, a1
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 63(sp)
 ; RV64-BITS-UNKNOWN-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a1, v0
-; RV64-BITS-UNKNOWN-NEXT:    srliw a2, a1, 31
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 32(sp)
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 62
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 62(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 61
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 61(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 60
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 60(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 59
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 59(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 58
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 58(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 57
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 57(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 56
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 56(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 55
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 55(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 54
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 54(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 53
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 53(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 52
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 52(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 51
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 51(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 50
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 50(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 49
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 49(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 48
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 48(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 47
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 47(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 46
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 46(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 45
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 45(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 44
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 44(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 43
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 43(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 42
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 42(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 41
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 41(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 40
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 40(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 39
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 39(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 38
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 38(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 37
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 37(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 36
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 36(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 35
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 35(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 34
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 34(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 33
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 33(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 31
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 31(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 30
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 30(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 29
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 29(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 28
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 28(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 27
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 27(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 26
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 26(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 25
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 25(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 24
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 24(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 23
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 23(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 22
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 22(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 21
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 21(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 20
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 20(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 19
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 19(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 18
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 18(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 17
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 17(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 16
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 16(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 15
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 15(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 14
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 13
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 12
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 11
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 10
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 9
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 8
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 7
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 6
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 5
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 4
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 3
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a2, a1, 2
-; RV64-BITS-UNKNOWN-NEXT:    srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a2, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT:    slli a1, a1, 1
-; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT:    sb a1, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT:    mv a1, sp
-; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-UNKNOWN-NEXT:    vle8.v v8, (a1)
+; RV64-BITS-UNKNOWN-NEXT:    vmv.x.s a0, v0
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a0, 63
+; RV64-BITS-UNKNOWN-NEXT:    li a2, 64
+; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 1
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 2
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 3
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 4
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 5
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 6
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 7
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 8
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 9
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 10
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 11
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 12
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 13
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 14
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 15
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 16
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 17
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 18
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 19
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 20
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 21
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 22
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 23
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 24
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 25
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 26
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 27
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 28
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 29
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 30
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 31
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    srliw a1, a0, 31
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 33
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 34
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 35
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 36
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 37
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 38
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 39
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 40
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 41
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 42
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 43
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 44
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 45
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 46
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 47
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 48
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 49
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 50
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 51
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 52
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 53
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 54
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 55
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 56
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 57
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 58
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 59
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 60
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a1, a0, 61
+; RV64-BITS-UNKNOWN-NEXT:    srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 62
+; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 63
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-UNKNOWN-NEXT:    vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT:    seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-UNKNOWN-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-UNKNOWN-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, s0, -128
-; RV64-BITS-UNKNOWN-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT:    addi sp, sp, 128
 ; RV64-BITS-UNKNOWN-NEXT:    ret
 ;
 ; RV64-BITS-256-LABEL: reverse_v64i1:
 ; RV64-BITS-256:       # %bb.0:
-; RV64-BITS-256-NEXT:    addi sp, sp, -128
-; RV64-BITS-256-NEXT:    .cfi_def_cfa_offset 128
-; RV64-BITS-256-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT:    .cfi_offset ra, -8
-; RV64-BITS-256-NEXT:    .cfi_offset s0, -16
-; RV64-BITS-256-NEXT:    addi s0, sp, 128
-; RV64-BITS-256-NEXT:    .cfi_def_cfa s0, 0
-; RV64-BITS-256-NEXT:    andi sp, sp, -64
-; RV64-BITS-256-NEXT:    li a0, 64
-; RV64-BITS-256-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-256-NEXT:    vfirst.m a1, v0
-; RV64-BITS-256-NEXT:    seqz a1, a1
-; RV64-BITS-256-NEXT:    sb a1, 63(sp)
 ; RV64-BITS-256-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-256-NEXT:    vmv.x.s a1, v0
-; RV64-BITS-256-NEXT:    srliw a2, a1, 31
-; RV64-BITS-256-NEXT:    sb a2, 32(sp)
-; RV64-BITS-256-NEXT:    srli a2, a1, 63
-; RV64-BITS-256-NEXT:    sb a2, 0(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 62
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 62(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 61
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 61(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 60
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 60(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 59
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 59(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 58
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 58(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 57
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 57(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 56
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 56(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 55
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 55(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 54
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 54(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 53
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 53(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 52
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 52(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 51
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 51(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 50
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 50(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 49
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 49(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 48
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 48(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 47
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 47(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 46
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 46(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 45
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 45(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 44
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 44(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 43
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 43(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 42
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 42(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 41
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 41(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 40
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 40(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 39
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 39(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 38
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 38(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 37
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 37(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 36
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 36(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 35
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 35(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 34
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 34(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 33
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 33(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 31
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 31(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 30
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 30(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 29
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 29(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 28
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 28(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 27
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 27(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 26
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 26(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 25
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 25(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 24
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 24(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 23
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 23(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 22
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 22(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 21
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 21(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 20
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 20(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 19
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 19(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 18
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 18(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 17
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 17(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 16
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 16(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 15
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 15(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 14
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 14(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 13
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 13(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 12
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 12(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 11
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 11(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 10
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 10(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 9
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 9(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 8
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 8(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 7
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 7(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 6
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 6(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 5
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 5(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 4
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 4(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 3
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 3(sp)
-; RV64-BITS-256-NEXT:    slli a2, a1, 2
-; RV64-BITS-256-NEXT:    srli a2, a2, 63
-; RV64-BITS-256-NEXT:    sb a2, 2(sp)
-; RV64-BITS-256-NEXT:    slli a1, a1, 1
-; RV64-BITS-256-NEXT:    srli a1, a1, 63
-; RV64-BITS-256-NEXT:    sb a1, 1(sp)
-; RV64-BITS-256-NEXT:    mv a1, sp
-; RV64-BITS-256-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-256-NEXT:    vle8.v v8, (a1)
+; RV64-BITS-256-NEXT:    vmv.x.s a0, v0
+; RV64-BITS-256-NEXT:    srli a1, a0, 63
+; RV64-BITS-256-NEXT:    li a2, 64
+; RV64-BITS-256-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 1
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 2
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 3
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 4
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 5
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 6
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 7
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 8
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 9
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 10
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 11
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 12
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 13
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 14
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 15
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 16
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 17
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 18
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 19
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 20
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 21
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 22
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 23
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 24
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 25
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 26
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 27
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 28
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 29
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 30
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 31
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    srliw a1, a0, 31
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 33
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 34
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 35
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 36
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 37
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 38
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 39
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 40
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 41
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 42
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 43
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 44
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 45
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 46
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 47
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 48
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 49
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 50
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 51
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 52
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 53
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 54
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 55
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 56
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 57
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 58
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 59
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 60
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a1, a0, 61
+; RV64-BITS-256-NEXT:    srli a1, a1, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT:    slli a0, a0, 62
+; RV64-BITS-256-NEXT:    srli a0, a0, 63
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-256-NEXT:    vfirst.m a0, v0
+; RV64-BITS-256-NEXT:    seqz a0, a0
+; RV64-BITS-256-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-256-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-256-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-256-NEXT:    addi sp, s0, -128
-; RV64-BITS-256-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT:    addi sp, sp, 128
 ; RV64-BITS-256-NEXT:    ret
 ;
 ; RV64-BITS-512-LABEL: reverse_v64i1:
 ; RV64-BITS-512:       # %bb.0:
-; RV64-BITS-512-NEXT:    addi sp, sp, -128
-; RV64-BITS-512-NEXT:    .cfi_def_cfa_offset 128
-; RV64-BITS-512-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT:    .cfi_offset ra, -8
-; RV64-BITS-512-NEXT:    .cfi_offset s0, -16
-; RV64-BITS-512-NEXT:    addi s0, sp, 128
-; RV64-BITS-512-NEXT:    .cfi_def_cfa s0, 0
-; RV64-BITS-512-NEXT:    andi sp, sp, -64
-; RV64-BITS-512-NEXT:    li a0, 64
-; RV64-BITS-512-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-512-NEXT:    vfirst.m a1, v0
-; RV64-BITS-512-NEXT:    seqz a1, a1
-; RV64-BITS-512-NEXT:    sb a1, 63(sp)
 ; RV64-BITS-512-NEXT:    vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-512-NEXT:    vmv.x.s a1, v0
-; RV64-BITS-512-NEXT:    srliw a2, a1, 31
-; RV64-BITS-512-NEXT:    sb a2, 32(sp)
-; RV64-BITS-512-NEXT:    srli a2, a1, 63
-; RV64-BITS-512-NEXT:    sb a2, 0(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 62
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 62(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 61
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 61(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 60
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 60(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 59
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 59(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 58
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 58(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 57
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 57(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 56
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 56(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 55
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 55(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 54
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 54(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 53
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 53(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 52
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 52(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 51
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 51(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 50
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 50(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 49
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 49(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 48
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 48(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 47
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 47(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 46
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 46(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 45
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 45(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 44
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 44(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 43
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 43(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 42
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 42(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 41
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 41(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 40
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 40(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 39
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 39(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 38
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 38(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 37
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 37(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 36
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 36(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 35
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 35(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 34
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 34(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 33
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 33(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 31
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 31(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 30
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 30(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 29
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 29(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 28
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 28(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 27
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 27(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 26
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 26(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 25
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 25(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 24
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 24(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 23
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 23(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 22
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 22(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 21
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 21(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 20
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 20(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 19
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 19(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 18
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 18(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 17
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 17(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 16
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 16(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 15
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 15(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 14
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 14(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 13
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 13(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 12
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 12(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 11
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 11(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 10
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 10(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 9
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 9(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 8
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 8(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 7
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 7(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 6
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 6(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 5
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 5(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 4
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 4(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 3
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 3(sp)
-; RV64-BITS-512-NEXT:    slli a2, a1, 2
-; RV64-BITS-512-NEXT:    srli a2, a2, 63
-; RV64-BITS-512-NEXT:    sb a2, 2(sp)
-; RV64-BITS-512-NEXT:    slli a1, a1, 1
-; RV64-BITS-512-NEXT:    srli a1, a1, 63
-; RV64-BITS-512-NEXT:    sb a1, 1(sp)
-; RV64-BITS-512-NEXT:    mv a1, sp
-; RV64-BITS-512-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-512-NEXT:    vle8.v v8, (a1)
+; RV64-BITS-512-NEXT:    vmv.x.s a0, v0
+; RV64-BITS-512-NEXT:    srli a1, a0, 63
+; RV64-BITS-512-NEXT:    li a2, 64
+; RV64-BITS-512-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 1
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 2
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 3
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 4
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 5
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 6
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 7
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 8
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 9
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 10
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 11
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 12
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 13
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 14
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 15
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 16
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 17
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 18
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 19
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 20
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 21
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 22
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 23
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 24
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 25
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 26
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 27
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 28
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 29
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 30
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 31
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    srliw a1, a0, 31
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 33
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 34
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 35
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 36
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 37
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 38
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 39
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 40
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 41
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 42
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 43
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 44
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 45
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 46
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 47
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 48
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 49
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 50
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 51
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 52
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 53
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 54
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 55
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 56
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 57
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 58
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 59
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 60
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a1, a0, 61
+; RV64-BITS-512-NEXT:    srli a1, a1, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT:    slli a0, a0, 62
+; RV64-BITS-512-NEXT:    srli a0, a0, 63
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-BITS-512-NEXT:    vfirst.m a0, v0
+; RV64-BITS-512-NEXT:    seqz a0, a0
+; RV64-BITS-512-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-BITS-512-NEXT:    vand.vi v8, v8, 1
 ; RV64-BITS-512-NEXT:    vmsne.vi v0, v8, 0
-; RV64-BITS-512-NEXT:    addi sp, s0, -128
-; RV64-BITS-512-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT:    addi sp, sp, 128
 ; RV64-BITS-512-NEXT:    ret
   %res = call <64 x i1> @llvm.experimental.vector.reverse.v64i1(<64 x i1> %a)
   ret <64 x i1> %res

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index ad5f11eb6e78..fdca1aaf6bc8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -310,11 +310,11 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ;
 ; RV32-LABEL: fp2si_v8f64_v8i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    vfmv.f.s fa3, v8
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 1
+; RV32-NEXT:    vfmv.f.s fa3, v12
 ; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
 ; RV32-NEXT:    lui a0, %hi(.LCPI12_1)
@@ -325,63 +325,73 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV32-NEXT:    fmin.d fa3, fa3, fa4
 ; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
 ; RV32-NEXT:    and a0, a0, a2
-; RV32-NEXT:    sb a0, 8(sp)
-; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT:    vslidedown.vi v12, v8, 7
-; RV32-NEXT:    vfmv.f.s fa3, v12
-; RV32-NEXT:    feq.d a0, fa3, fa3
-; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    vfmv.f.s fa3, v8
+; RV32-NEXT:    feq.d a2, fa3, fa3
+; RV32-NEXT:    neg a2, a2
 ; RV32-NEXT:    fmax.d fa3, fa3, fa5
 ; RV32-NEXT:    fmin.d fa3, fa3, fa4
-; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
-; RV32-NEXT:    and a0, a0, a2
-; RV32-NEXT:    sb a0, 15(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 6
-; RV32-NEXT:    vfmv.f.s fa3, v12
+; RV32-NEXT:    fcvt.w.d a3, fa3, rtz
+; RV32-NEXT:    and a2, a2, a3
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v8, a2
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 2
+; RV32-NEXT:    vfmv.f.s fa3, v16
 ; RV32-NEXT:    feq.d a0, fa3, fa3
 ; RV32-NEXT:    neg a0, a0
 ; RV32-NEXT:    fmax.d fa3, fa3, fa5
 ; RV32-NEXT:    fmin.d fa3, fa3, fa4
 ; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
 ; RV32-NEXT:    and a0, a0, a2
-; RV32-NEXT:    sb a0, 14(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 5
-; RV32-NEXT:    vfmv.f.s fa3, v12
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 3
+; RV32-NEXT:    vfmv.f.s fa3, v16
 ; RV32-NEXT:    feq.d a0, fa3, fa3
 ; RV32-NEXT:    neg a0, a0
 ; RV32-NEXT:    fmax.d fa3, fa3, fa5
 ; RV32-NEXT:    fmin.d fa3, fa3, fa4
 ; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
 ; RV32-NEXT:    and a0, a0, a2
-; RV32-NEXT:    sb a0, 13(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 4
-; RV32-NEXT:    vfmv.f.s fa3, v12
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 4
+; RV32-NEXT:    vfmv.f.s fa3, v16
 ; RV32-NEXT:    feq.d a0, fa3, fa3
 ; RV32-NEXT:    neg a0, a0
 ; RV32-NEXT:    fmax.d fa3, fa3, fa5
 ; RV32-NEXT:    fmin.d fa3, fa3, fa4
 ; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
 ; RV32-NEXT:    and a0, a0, a2
-; RV32-NEXT:    sb a0, 12(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 3
-; RV32-NEXT:    vfmv.f.s fa3, v12
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 5
+; RV32-NEXT:    vfmv.f.s fa3, v16
 ; RV32-NEXT:    feq.d a0, fa3, fa3
 ; RV32-NEXT:    neg a0, a0
 ; RV32-NEXT:    fmax.d fa3, fa3, fa5
 ; RV32-NEXT:    fmin.d fa3, fa3, fa4
 ; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
 ; RV32-NEXT:    and a0, a0, a2
-; RV32-NEXT:    sb a0, 11(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 2
-; RV32-NEXT:    vfmv.f.s fa3, v12
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 6
+; RV32-NEXT:    vfmv.f.s fa3, v16
 ; RV32-NEXT:    feq.d a0, fa3, fa3
 ; RV32-NEXT:    neg a0, a0
 ; RV32-NEXT:    fmax.d fa3, fa3, fa5
 ; RV32-NEXT:    fmin.d fa3, fa3, fa4
 ; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
 ; RV32-NEXT:    and a0, a0, a2
-; RV32-NEXT:    sb a0, 10(sp)
-; RV32-NEXT:    vslidedown.vi v8, v8, 1
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v8, 7
 ; RV32-NEXT:    vfmv.f.s fa3, v8
 ; RV32-NEXT:    feq.d a0, fa3, fa3
 ; RV32-NEXT:    neg a0, a0
@@ -389,101 +399,105 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV32-NEXT:    fmin.d fa5, fa5, fa4
 ; RV32-NEXT:    fcvt.w.d a2, fa5, rtz
 ; RV32-NEXT:    and a0, a0, a2
-; RV32-NEXT:    sb a0, 9(sp)
-; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
+; RV32-NEXT:    vslide1down.vx v8, v12, a0
 ; RV32-NEXT:    vse8.v v8, (a1)
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: fp2si_v8f64_v8i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    vfmv.f.s fa3, v8
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 1
+; RV64-NEXT:    vfmv.f.s fa3, v12
 ; RV64-NEXT:    lui a0, %hi(.LCPI12_0)
 ; RV64-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
 ; RV64-NEXT:    lui a0, %hi(.LCPI12_1)
 ; RV64-NEXT:    fld fa4, %lo(.LCPI12_1)(a0)
 ; RV64-NEXT:    feq.d a0, fa3, fa3
-; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    neg a0, a0
 ; RV64-NEXT:    fmax.d fa3, fa3, fa5
 ; RV64-NEXT:    fmin.d fa3, fa3, fa4
 ; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
 ; RV64-NEXT:    and a0, a0, a2
-; RV64-NEXT:    sb a0, 8(sp)
-; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
-; RV64-NEXT:    vslidedown.vi v12, v8, 7
-; RV64-NEXT:    vfmv.f.s fa3, v12
-; RV64-NEXT:    feq.d a0, fa3, fa3
-; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    vfmv.f.s fa3, v8
+; RV64-NEXT:    feq.d a2, fa3, fa3
+; RV64-NEXT:    neg a2, a2
 ; RV64-NEXT:    fmax.d fa3, fa3, fa5
 ; RV64-NEXT:    fmin.d fa3, fa3, fa4
-; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
-; RV64-NEXT:    and a0, a0, a2
-; RV64-NEXT:    sb a0, 15(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 6
-; RV64-NEXT:    vfmv.f.s fa3, v12
+; RV64-NEXT:    fcvt.l.d a3, fa3, rtz
+; RV64-NEXT:    and a2, a2, a3
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v8, a2
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 2
+; RV64-NEXT:    vfmv.f.s fa3, v16
 ; RV64-NEXT:    feq.d a0, fa3, fa3
-; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    neg a0, a0
 ; RV64-NEXT:    fmax.d fa3, fa3, fa5
 ; RV64-NEXT:    fmin.d fa3, fa3, fa4
 ; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
 ; RV64-NEXT:    and a0, a0, a2
-; RV64-NEXT:    sb a0, 14(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 5
-; RV64-NEXT:    vfmv.f.s fa3, v12
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 3
+; RV64-NEXT:    vfmv.f.s fa3, v16
 ; RV64-NEXT:    feq.d a0, fa3, fa3
-; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    neg a0, a0
 ; RV64-NEXT:    fmax.d fa3, fa3, fa5
 ; RV64-NEXT:    fmin.d fa3, fa3, fa4
 ; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
 ; RV64-NEXT:    and a0, a0, a2
-; RV64-NEXT:    sb a0, 13(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 4
-; RV64-NEXT:    vfmv.f.s fa3, v12
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 4
+; RV64-NEXT:    vfmv.f.s fa3, v16
 ; RV64-NEXT:    feq.d a0, fa3, fa3
-; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    neg a0, a0
 ; RV64-NEXT:    fmax.d fa3, fa3, fa5
 ; RV64-NEXT:    fmin.d fa3, fa3, fa4
 ; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
 ; RV64-NEXT:    and a0, a0, a2
-; RV64-NEXT:    sb a0, 12(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 3
-; RV64-NEXT:    vfmv.f.s fa3, v12
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 5
+; RV64-NEXT:    vfmv.f.s fa3, v16
 ; RV64-NEXT:    feq.d a0, fa3, fa3
-; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    neg a0, a0
 ; RV64-NEXT:    fmax.d fa3, fa3, fa5
 ; RV64-NEXT:    fmin.d fa3, fa3, fa4
 ; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
 ; RV64-NEXT:    and a0, a0, a2
-; RV64-NEXT:    sb a0, 11(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 2
-; RV64-NEXT:    vfmv.f.s fa3, v12
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 6
+; RV64-NEXT:    vfmv.f.s fa3, v16
 ; RV64-NEXT:    feq.d a0, fa3, fa3
-; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    neg a0, a0
 ; RV64-NEXT:    fmax.d fa3, fa3, fa5
 ; RV64-NEXT:    fmin.d fa3, fa3, fa4
 ; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
 ; RV64-NEXT:    and a0, a0, a2
-; RV64-NEXT:    sb a0, 10(sp)
-; RV64-NEXT:    vslidedown.vi v8, v8, 1
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v8, 7
 ; RV64-NEXT:    vfmv.f.s fa3, v8
 ; RV64-NEXT:    feq.d a0, fa3, fa3
-; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    neg a0, a0
 ; RV64-NEXT:    fmax.d fa5, fa3, fa5
 ; RV64-NEXT:    fmin.d fa5, fa5, fa4
 ; RV64-NEXT:    fcvt.l.d a2, fa5, rtz
 ; RV64-NEXT:    and a0, a0, a2
-; RV64-NEXT:    sb a0, 9(sp)
-; RV64-NEXT:    addi a0, sp, 8
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vle8.v v8, (a0)
+; RV64-NEXT:    vslide1down.vx v8, v12, a0
 ; RV64-NEXT:    vse8.v v8, (a1)
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %d = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> %a)
@@ -496,8 +510,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
 ;
 ; RV32-LABEL: fp2ui_v8f64_v8i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
@@ -507,61 +519,69 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV32-NEXT:    fmax.d fa4, fa4, fa3
 ; RV32-NEXT:    fmin.d fa4, fa4, fa5
 ; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT:    sb a0, 8(sp)
+; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v8, a0
 ; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT:    vslidedown.vi v12, v8, 7
-; RV32-NEXT:    vfmv.f.s fa4, v12
+; RV32-NEXT:    vslidedown.vi v16, v8, 1
+; RV32-NEXT:    vfmv.f.s fa4, v16
 ; RV32-NEXT:    fmax.d fa4, fa4, fa3
 ; RV32-NEXT:    fmin.d fa4, fa4, fa5
 ; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT:    sb a0, 15(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 6
-; RV32-NEXT:    vfmv.f.s fa4, v12
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 2
+; RV32-NEXT:    vfmv.f.s fa4, v16
 ; RV32-NEXT:    fmax.d fa4, fa4, fa3
 ; RV32-NEXT:    fmin.d fa4, fa4, fa5
 ; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT:    sb a0, 14(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 5
-; RV32-NEXT:    vfmv.f.s fa4, v12
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 3
+; RV32-NEXT:    vfmv.f.s fa4, v16
 ; RV32-NEXT:    fmax.d fa4, fa4, fa3
 ; RV32-NEXT:    fmin.d fa4, fa4, fa5
 ; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT:    sb a0, 13(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 4
-; RV32-NEXT:    vfmv.f.s fa4, v12
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 4
+; RV32-NEXT:    vfmv.f.s fa4, v16
 ; RV32-NEXT:    fmax.d fa4, fa4, fa3
 ; RV32-NEXT:    fmin.d fa4, fa4, fa5
 ; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT:    sb a0, 12(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 3
-; RV32-NEXT:    vfmv.f.s fa4, v12
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 5
+; RV32-NEXT:    vfmv.f.s fa4, v16
 ; RV32-NEXT:    fmax.d fa4, fa4, fa3
 ; RV32-NEXT:    fmin.d fa4, fa4, fa5
 ; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT:    sb a0, 11(sp)
-; RV32-NEXT:    vslidedown.vi v12, v8, 2
-; RV32-NEXT:    vfmv.f.s fa4, v12
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 6
+; RV32-NEXT:    vfmv.f.s fa4, v16
 ; RV32-NEXT:    fmax.d fa4, fa4, fa3
 ; RV32-NEXT:    fmin.d fa4, fa4, fa5
 ; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT:    sb a0, 10(sp)
-; RV32-NEXT:    vslidedown.vi v8, v8, 1
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v8, 7
 ; RV32-NEXT:    vfmv.f.s fa4, v8
 ; RV32-NEXT:    fmax.d fa4, fa4, fa3
 ; RV32-NEXT:    fmin.d fa5, fa4, fa5
 ; RV32-NEXT:    fcvt.wu.d a0, fa5, rtz
-; RV32-NEXT:    sb a0, 9(sp)
-; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
+; RV32-NEXT:    vslide1down.vx v8, v12, a0
 ; RV32-NEXT:    vse8.v v8, (a1)
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: fp2ui_v8f64_v8i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    lui a0, %hi(.LCPI13_0)
@@ -571,55 +591,65 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV64-NEXT:    fmax.d fa4, fa4, fa3
 ; RV64-NEXT:    fmin.d fa4, fa4, fa5
 ; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT:    sb a0, 8(sp)
+; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v8, a0
 ; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
-; RV64-NEXT:    vslidedown.vi v12, v8, 7
-; RV64-NEXT:    vfmv.f.s fa4, v12
+; RV64-NEXT:    vslidedown.vi v16, v8, 1
+; RV64-NEXT:    vfmv.f.s fa4, v16
 ; RV64-NEXT:    fmax.d fa4, fa4, fa3
 ; RV64-NEXT:    fmin.d fa4, fa4, fa5
 ; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT:    sb a0, 15(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 6
-; RV64-NEXT:    vfmv.f.s fa4, v12
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 2
+; RV64-NEXT:    vfmv.f.s fa4, v16
 ; RV64-NEXT:    fmax.d fa4, fa4, fa3
 ; RV64-NEXT:    fmin.d fa4, fa4, fa5
 ; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT:    sb a0, 14(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 5
-; RV64-NEXT:    vfmv.f.s fa4, v12
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 3
+; RV64-NEXT:    vfmv.f.s fa4, v16
 ; RV64-NEXT:    fmax.d fa4, fa4, fa3
 ; RV64-NEXT:    fmin.d fa4, fa4, fa5
 ; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT:    sb a0, 13(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 4
-; RV64-NEXT:    vfmv.f.s fa4, v12
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 4
+; RV64-NEXT:    vfmv.f.s fa4, v16
 ; RV64-NEXT:    fmax.d fa4, fa4, fa3
 ; RV64-NEXT:    fmin.d fa4, fa4, fa5
 ; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT:    sb a0, 12(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 3
-; RV64-NEXT:    vfmv.f.s fa4, v12
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 5
+; RV64-NEXT:    vfmv.f.s fa4, v16
 ; RV64-NEXT:    fmax.d fa4, fa4, fa3
 ; RV64-NEXT:    fmin.d fa4, fa4, fa5
 ; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT:    sb a0, 11(sp)
-; RV64-NEXT:    vslidedown.vi v12, v8, 2
-; RV64-NEXT:    vfmv.f.s fa4, v12
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 6
+; RV64-NEXT:    vfmv.f.s fa4, v16
 ; RV64-NEXT:    fmax.d fa4, fa4, fa3
 ; RV64-NEXT:    fmin.d fa4, fa4, fa5
 ; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT:    sb a0, 10(sp)
-; RV64-NEXT:    vslidedown.vi v8, v8, 1
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v8, 7
 ; RV64-NEXT:    vfmv.f.s fa4, v8
 ; RV64-NEXT:    fmax.d fa4, fa4, fa3
 ; RV64-NEXT:    fmin.d fa5, fa4, fa5
 ; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
-; RV64-NEXT:    sb a0, 9(sp)
-; RV64-NEXT:    addi a0, sp, 8
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vle8.v v8, (a0)
+; RV64-NEXT:    vslide1down.vx v8, v12, a0
 ; RV64-NEXT:    vse8.v v8, (a1)
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %d = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> %a)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index 9951864e6525..110cf81a6716 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -216,86 +216,70 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) {
 define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
 ; LMULMAX8RV32-LABEL: si2fp_v3i7_v3f32:
 ; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    addi sp, sp, -16
-; LMULMAX8RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX8RV32-NEXT:    lw a1, 8(a0)
-; LMULMAX8RV32-NEXT:    sb a1, 14(sp)
-; LMULMAX8RV32-NEXT:    lw a1, 4(a0)
-; LMULMAX8RV32-NEXT:    sb a1, 13(sp)
-; LMULMAX8RV32-NEXT:    lw a0, 0(a0)
-; LMULMAX8RV32-NEXT:    sb a0, 12(sp)
-; LMULMAX8RV32-NEXT:    addi a0, sp, 12
+; LMULMAX8RV32-NEXT:    lw a1, 0(a0)
 ; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32-NEXT:    vle8.v v8, (a0)
+; LMULMAX8RV32-NEXT:    lw a2, 4(a0)
+; LMULMAX8RV32-NEXT:    lw a0, 8(a0)
+; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a1
+; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a2
+; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a0
+; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; LMULMAX8RV32-NEXT:    vadd.vv v8, v8, v8
 ; LMULMAX8RV32-NEXT:    vsra.vi v8, v8, 1
 ; LMULMAX8RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; LMULMAX8RV32-NEXT:    vsext.vf2 v9, v8
 ; LMULMAX8RV32-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX8RV32-NEXT:    addi sp, sp, 16
 ; LMULMAX8RV32-NEXT:    ret
 ;
 ; LMULMAX8RV64-LABEL: si2fp_v3i7_v3f32:
 ; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    addi sp, sp, -16
-; LMULMAX8RV64-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX8RV64-NEXT:    ld a1, 16(a0)
-; LMULMAX8RV64-NEXT:    sb a1, 14(sp)
-; LMULMAX8RV64-NEXT:    ld a1, 8(a0)
-; LMULMAX8RV64-NEXT:    sb a1, 13(sp)
-; LMULMAX8RV64-NEXT:    ld a0, 0(a0)
-; LMULMAX8RV64-NEXT:    sb a0, 12(sp)
-; LMULMAX8RV64-NEXT:    addi a0, sp, 12
+; LMULMAX8RV64-NEXT:    ld a1, 0(a0)
 ; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64-NEXT:    vle8.v v8, (a0)
+; LMULMAX8RV64-NEXT:    ld a2, 8(a0)
+; LMULMAX8RV64-NEXT:    ld a0, 16(a0)
+; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a1
+; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a2
+; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a0
+; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a0
 ; LMULMAX8RV64-NEXT:    vadd.vv v8, v8, v8
 ; LMULMAX8RV64-NEXT:    vsra.vi v8, v8, 1
 ; LMULMAX8RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; LMULMAX8RV64-NEXT:    vsext.vf2 v9, v8
 ; LMULMAX8RV64-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX8RV64-NEXT:    addi sp, sp, 16
 ; LMULMAX8RV64-NEXT:    ret
 ;
 ; LMULMAX1RV32-LABEL: si2fp_v3i7_v3f32:
 ; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    addi sp, sp, -16
-; LMULMAX1RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX1RV32-NEXT:    lw a1, 8(a0)
-; LMULMAX1RV32-NEXT:    sb a1, 14(sp)
-; LMULMAX1RV32-NEXT:    lw a1, 4(a0)
-; LMULMAX1RV32-NEXT:    sb a1, 13(sp)
-; LMULMAX1RV32-NEXT:    lw a0, 0(a0)
-; LMULMAX1RV32-NEXT:    sb a0, 12(sp)
-; LMULMAX1RV32-NEXT:    addi a0, sp, 12
+; LMULMAX1RV32-NEXT:    lw a1, 0(a0)
 ; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV32-NEXT:    vle8.v v8, (a0)
+; LMULMAX1RV32-NEXT:    lw a2, 4(a0)
+; LMULMAX1RV32-NEXT:    lw a0, 8(a0)
+; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a1
+; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a2
+; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a0
+; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; LMULMAX1RV32-NEXT:    vadd.vv v8, v8, v8
 ; LMULMAX1RV32-NEXT:    vsra.vi v8, v8, 1
 ; LMULMAX1RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; LMULMAX1RV32-NEXT:    vsext.vf2 v9, v8
 ; LMULMAX1RV32-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX1RV32-NEXT:    addi sp, sp, 16
 ; LMULMAX1RV32-NEXT:    ret
 ;
 ; LMULMAX1RV64-LABEL: si2fp_v3i7_v3f32:
 ; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    addi sp, sp, -16
-; LMULMAX1RV64-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX1RV64-NEXT:    ld a1, 16(a0)
-; LMULMAX1RV64-NEXT:    sb a1, 14(sp)
-; LMULMAX1RV64-NEXT:    ld a1, 8(a0)
-; LMULMAX1RV64-NEXT:    sb a1, 13(sp)
-; LMULMAX1RV64-NEXT:    ld a0, 0(a0)
-; LMULMAX1RV64-NEXT:    sb a0, 12(sp)
-; LMULMAX1RV64-NEXT:    addi a0, sp, 12
+; LMULMAX1RV64-NEXT:    ld a1, 0(a0)
 ; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV64-NEXT:    vle8.v v8, (a0)
+; LMULMAX1RV64-NEXT:    ld a2, 8(a0)
+; LMULMAX1RV64-NEXT:    ld a0, 16(a0)
+; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a1
+; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a2
+; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a0
+; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a0
 ; LMULMAX1RV64-NEXT:    vadd.vv v8, v8, v8
 ; LMULMAX1RV64-NEXT:    vsra.vi v8, v8, 1
 ; LMULMAX1RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; LMULMAX1RV64-NEXT:    vsext.vf2 v9, v8
 ; LMULMAX1RV64-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX1RV64-NEXT:    addi sp, sp, 16
 ; LMULMAX1RV64-NEXT:    ret
   %z = sitofp <3 x i7> %x to <3 x float>
   ret <3 x float> %z
@@ -305,86 +289,70 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
 define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
 ; LMULMAX8RV32-LABEL: ui2fp_v3i7_v3f32:
 ; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    addi sp, sp, -16
-; LMULMAX8RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX8RV32-NEXT:    lw a1, 8(a0)
-; LMULMAX8RV32-NEXT:    sb a1, 14(sp)
-; LMULMAX8RV32-NEXT:    lw a1, 4(a0)
-; LMULMAX8RV32-NEXT:    sb a1, 13(sp)
-; LMULMAX8RV32-NEXT:    lw a0, 0(a0)
-; LMULMAX8RV32-NEXT:    sb a0, 12(sp)
-; LMULMAX8RV32-NEXT:    addi a0, sp, 12
+; LMULMAX8RV32-NEXT:    lw a1, 0(a0)
 ; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32-NEXT:    vle8.v v8, (a0)
+; LMULMAX8RV32-NEXT:    lw a2, 4(a0)
+; LMULMAX8RV32-NEXT:    lw a0, 8(a0)
+; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a1
+; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a2
+; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a0
+; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; LMULMAX8RV32-NEXT:    li a0, 127
 ; LMULMAX8RV32-NEXT:    vand.vx v8, v8, a0
 ; LMULMAX8RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; LMULMAX8RV32-NEXT:    vzext.vf2 v9, v8
 ; LMULMAX8RV32-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX8RV32-NEXT:    addi sp, sp, 16
 ; LMULMAX8RV32-NEXT:    ret
 ;
 ; LMULMAX8RV64-LABEL: ui2fp_v3i7_v3f32:
 ; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    addi sp, sp, -16
-; LMULMAX8RV64-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX8RV64-NEXT:    ld a1, 16(a0)
-; LMULMAX8RV64-NEXT:    sb a1, 14(sp)
-; LMULMAX8RV64-NEXT:    ld a1, 8(a0)
-; LMULMAX8RV64-NEXT:    sb a1, 13(sp)
-; LMULMAX8RV64-NEXT:    ld a0, 0(a0)
-; LMULMAX8RV64-NEXT:    sb a0, 12(sp)
-; LMULMAX8RV64-NEXT:    addi a0, sp, 12
+; LMULMAX8RV64-NEXT:    ld a1, 0(a0)
 ; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64-NEXT:    vle8.v v8, (a0)
+; LMULMAX8RV64-NEXT:    ld a2, 8(a0)
+; LMULMAX8RV64-NEXT:    ld a0, 16(a0)
+; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a1
+; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a2
+; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a0
+; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a0
 ; LMULMAX8RV64-NEXT:    li a0, 127
 ; LMULMAX8RV64-NEXT:    vand.vx v8, v8, a0
 ; LMULMAX8RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; LMULMAX8RV64-NEXT:    vzext.vf2 v9, v8
 ; LMULMAX8RV64-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX8RV64-NEXT:    addi sp, sp, 16
 ; LMULMAX8RV64-NEXT:    ret
 ;
 ; LMULMAX1RV32-LABEL: ui2fp_v3i7_v3f32:
 ; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    addi sp, sp, -16
-; LMULMAX1RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX1RV32-NEXT:    lw a1, 8(a0)
-; LMULMAX1RV32-NEXT:    sb a1, 14(sp)
-; LMULMAX1RV32-NEXT:    lw a1, 4(a0)
-; LMULMAX1RV32-NEXT:    sb a1, 13(sp)
-; LMULMAX1RV32-NEXT:    lw a0, 0(a0)
-; LMULMAX1RV32-NEXT:    sb a0, 12(sp)
-; LMULMAX1RV32-NEXT:    addi a0, sp, 12
+; LMULMAX1RV32-NEXT:    lw a1, 0(a0)
 ; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV32-NEXT:    vle8.v v8, (a0)
+; LMULMAX1RV32-NEXT:    lw a2, 4(a0)
+; LMULMAX1RV32-NEXT:    lw a0, 8(a0)
+; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a1
+; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a2
+; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a0
+; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; LMULMAX1RV32-NEXT:    li a0, 127
 ; LMULMAX1RV32-NEXT:    vand.vx v8, v8, a0
 ; LMULMAX1RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; LMULMAX1RV32-NEXT:    vzext.vf2 v9, v8
 ; LMULMAX1RV32-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX1RV32-NEXT:    addi sp, sp, 16
 ; LMULMAX1RV32-NEXT:    ret
 ;
 ; LMULMAX1RV64-LABEL: ui2fp_v3i7_v3f32:
 ; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    addi sp, sp, -16
-; LMULMAX1RV64-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX1RV64-NEXT:    ld a1, 16(a0)
-; LMULMAX1RV64-NEXT:    sb a1, 14(sp)
-; LMULMAX1RV64-NEXT:    ld a1, 8(a0)
-; LMULMAX1RV64-NEXT:    sb a1, 13(sp)
-; LMULMAX1RV64-NEXT:    ld a0, 0(a0)
-; LMULMAX1RV64-NEXT:    sb a0, 12(sp)
-; LMULMAX1RV64-NEXT:    addi a0, sp, 12
+; LMULMAX1RV64-NEXT:    ld a1, 0(a0)
 ; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV64-NEXT:    vle8.v v8, (a0)
+; LMULMAX1RV64-NEXT:    ld a2, 8(a0)
+; LMULMAX1RV64-NEXT:    ld a0, 16(a0)
+; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a1
+; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a2
+; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a0
+; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a0
 ; LMULMAX1RV64-NEXT:    li a0, 127
 ; LMULMAX1RV64-NEXT:    vand.vx v8, v8, a0
 ; LMULMAX1RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; LMULMAX1RV64-NEXT:    vzext.vf2 v9, v8
 ; LMULMAX1RV64-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX1RV64-NEXT:    addi sp, sp, 16
 ; LMULMAX1RV64-NEXT:    ret
   %z = uitofp <3 x i7> %x to <3 x float>
   ret <3 x float> %z

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
index 56af96e6c630..c2aa59000f66 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
@@ -21,8 +21,6 @@ define <5 x i8> @load_v5i8(ptr %p) {
 define <5 x i8> @load_v5i8_align1(ptr %p) {
 ; RV32-LABEL: load_v5i8_align1:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    lbu a1, 1(a0)
 ; RV32-NEXT:    lbu a2, 0(a0)
 ; RV32-NEXT:    lbu a3, 2(a0)
@@ -35,29 +33,28 @@ define <5 x i8> @load_v5i8_align1(ptr %p) {
 ; RV32-NEXT:    or a1, a3, a1
 ; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV32-NEXT:    vmv.s.x v8, a1
+; RV32-NEXT:    vslidedown.vi v9, v8, 3
+; RV32-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v9
+; RV32-NEXT:    vslidedown.vi v9, v8, 2
+; RV32-NEXT:    vmv.x.s a2, v9
 ; RV32-NEXT:    vslidedown.vi v9, v8, 1
-; RV32-NEXT:    vslidedown.vi v10, v8, 2
-; RV32-NEXT:    vslidedown.vi v11, v8, 3
+; RV32-NEXT:    vmv.x.s a3, v9
+; RV32-NEXT:    vmv.x.s a4, v8
 ; RV32-NEXT:    lb a0, 4(a0)
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; RV32-NEXT:    vse8.v v8, (a1)
-; RV32-NEXT:    addi a2, sp, 11
-; RV32-NEXT:    vse8.v v11, (a2)
-; RV32-NEXT:    addi a2, sp, 10
-; RV32-NEXT:    vse8.v v10, (a2)
-; RV32-NEXT:    addi a2, sp, 9
-; RV32-NEXT:    vse8.v v9, (a2)
-; RV32-NEXT:    sb a0, 12(sp)
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vle8.v v8, (a1)
-; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    vslide1down.vx v8, v8, a4
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_v5i8_align1:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    lbu a1, 1(a0)
 ; RV64-NEXT:    lbu a2, 0(a0)
 ; RV64-NEXT:    lbu a3, 2(a0)
@@ -70,23 +67,24 @@ define <5 x i8> @load_v5i8_align1(ptr %p) {
 ; RV64-NEXT:    or a1, a3, a1
 ; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV64-NEXT:    vmv.s.x v8, a1
+; RV64-NEXT:    vslidedown.vi v9, v8, 3
+; RV64-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v9
+; RV64-NEXT:    vslidedown.vi v9, v8, 2
+; RV64-NEXT:    vmv.x.s a2, v9
 ; RV64-NEXT:    vslidedown.vi v9, v8, 1
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    vslidedown.vi v11, v8, 3
+; RV64-NEXT:    vmv.x.s a3, v9
+; RV64-NEXT:    vmv.x.s a4, v8
 ; RV64-NEXT:    lb a0, 4(a0)
-; RV64-NEXT:    addi a1, sp, 8
-; RV64-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; RV64-NEXT:    vse8.v v8, (a1)
-; RV64-NEXT:    addi a2, sp, 11
-; RV64-NEXT:    vse8.v v11, (a2)
-; RV64-NEXT:    addi a2, sp, 10
-; RV64-NEXT:    vse8.v v10, (a2)
-; RV64-NEXT:    addi a2, sp, 9
-; RV64-NEXT:    vse8.v v9, (a2)
-; RV64-NEXT:    sb a0, 12(sp)
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vle8.v v8, (a1)
-; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    vslide1down.vx v8, v8, a4
+; RV64-NEXT:    vslide1down.vx v8, v8, a3
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-NEXT:    ret
   %x = load <5 x i8>, ptr %p, align 1
   ret <5 x i8> %x
@@ -180,60 +178,54 @@ define <6 x double> @load_v6f64(ptr %p) {
 define <6 x i1> @load_v6i1(ptr %p) {
 ; RV32-LABEL: load_v6i1:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    lbu a0, 0(a0)
-; RV32-NEXT:    slli a1, a0, 30
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    slli a2, a0, 29
+; RV32-NEXT:    srli a1, a0, 5
+; RV32-NEXT:    slli a2, a0, 27
 ; RV32-NEXT:    srli a2, a2, 31
 ; RV32-NEXT:    slli a3, a0, 28
 ; RV32-NEXT:    srli a3, a3, 31
-; RV32-NEXT:    slli a4, a0, 27
+; RV32-NEXT:    slli a4, a0, 29
 ; RV32-NEXT:    srli a4, a4, 31
-; RV32-NEXT:    andi a5, a0, 1
-; RV32-NEXT:    srli a0, a0, 5
-; RV32-NEXT:    sb a0, 13(sp)
-; RV32-NEXT:    sb a5, 8(sp)
-; RV32-NEXT:    sb a4, 12(sp)
-; RV32-NEXT:    sb a3, 11(sp)
-; RV32-NEXT:    sb a2, 10(sp)
-; RV32-NEXT:    sb a1, 9(sp)
-; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    slli a5, a0, 30
+; RV32-NEXT:    srli a5, a5, 31
+; RV32-NEXT:    andi a0, a0, 1
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a5
+; RV32-NEXT:    vslide1down.vx v8, v8, a4
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-NEXT:    vand.vi v8, v8, 1
 ; RV32-NEXT:    vmsne.vi v0, v8, 0
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_v6i1:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    lbu a0, 0(a0)
-; RV64-NEXT:    slli a1, a0, 62
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    slli a2, a0, 61
+; RV64-NEXT:    srli a1, a0, 5
+; RV64-NEXT:    slli a2, a0, 59
 ; RV64-NEXT:    srli a2, a2, 63
 ; RV64-NEXT:    slli a3, a0, 60
 ; RV64-NEXT:    srli a3, a3, 63
-; RV64-NEXT:    slli a4, a0, 59
+; RV64-NEXT:    slli a4, a0, 61
 ; RV64-NEXT:    srli a4, a4, 63
-; RV64-NEXT:    andi a5, a0, 1
-; RV64-NEXT:    srli a0, a0, 5
-; RV64-NEXT:    sb a0, 13(sp)
-; RV64-NEXT:    sb a5, 8(sp)
-; RV64-NEXT:    sb a4, 12(sp)
-; RV64-NEXT:    sb a3, 11(sp)
-; RV64-NEXT:    sb a2, 10(sp)
-; RV64-NEXT:    sb a1, 9(sp)
-; RV64-NEXT:    addi a0, sp, 8
+; RV64-NEXT:    slli a5, a0, 62
+; RV64-NEXT:    srli a5, a5, 63
+; RV64-NEXT:    andi a0, a0, 1
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vle8.v v8, (a0)
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    vslide1down.vx v8, v8, a5
+; RV64-NEXT:    vslide1down.vx v8, v8, a4
+; RV64-NEXT:    vslide1down.vx v8, v8, a3
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-NEXT:    vand.vi v8, v8, 1
 ; RV64-NEXT:    vmsne.vi v0, v8, 0
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %x = load <6 x i1>, ptr %p
   ret <6 x i1> %x

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index 27e991852893..68fb15a58258 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -81,30 +81,20 @@ define <2 x i1> @buildvec_mask_nonconst_v2i1(i1 %x, i1 %y) {
 define <2 x i1> @buildvec_mask_optsize_nonconst_v2i1(i1 %x, i1 %y) optsize {
 ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v2i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    sb a1, 15(sp)
-; CHECK-NEXT:    sb a0, 14(sp)
-; CHECK-NEXT:    addi a0, sp, 14
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v2i1:
 ; ZVE32F:       # %bb.0:
-; ZVE32F-NEXT:    addi sp, sp, -16
-; ZVE32F-NEXT:    .cfi_def_cfa_offset 16
-; ZVE32F-NEXT:    sb a1, 15(sp)
-; ZVE32F-NEXT:    sb a0, 14(sp)
-; ZVE32F-NEXT:    addi a0, sp, 14
 ; ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; ZVE32F-NEXT:    vle8.v v8, (a0)
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVE32F-NEXT:    vand.vi v8, v8, 1
 ; ZVE32F-NEXT:    vmsne.vi v0, v8, 0
-; ZVE32F-NEXT:    addi sp, sp, 16
 ; ZVE32F-NEXT:    ret
   %1 = insertelement <2 x i1> poison, i1 %x, i32 0
   %2 = insertelement <2 x i1> %1,  i1 %y, i32 1
@@ -195,34 +185,24 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) {
 define <4 x i1> @buildvec_mask_optsize_nonconst_v4i1(i1 %x, i1 %y) optsize {
 ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v4i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    sb a1, 15(sp)
-; CHECK-NEXT:    sb a1, 14(sp)
-; CHECK-NEXT:    sb a0, 13(sp)
-; CHECK-NEXT:    sb a0, 12(sp)
-; CHECK-NEXT:    addi a0, sp, 12
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v4i1:
 ; ZVE32F:       # %bb.0:
-; ZVE32F-NEXT:    addi sp, sp, -16
-; ZVE32F-NEXT:    .cfi_def_cfa_offset 16
-; ZVE32F-NEXT:    sb a1, 15(sp)
-; ZVE32F-NEXT:    sb a1, 14(sp)
-; ZVE32F-NEXT:    sb a0, 13(sp)
-; ZVE32F-NEXT:    sb a0, 12(sp)
-; ZVE32F-NEXT:    addi a0, sp, 12
 ; ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32F-NEXT:    vle8.v v8, (a0)
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVE32F-NEXT:    vand.vi v8, v8, 1
 ; ZVE32F-NEXT:    vmsne.vi v0, v8, 0
-; ZVE32F-NEXT:    addi sp, sp, 16
 ; ZVE32F-NEXT:    ret
   %1 = insertelement <4 x i1> poison, i1 %x, i32 0
   %2 = insertelement <4 x i1> %1,  i1 %x, i32 1
@@ -234,36 +214,26 @@ define <4 x i1> @buildvec_mask_optsize_nonconst_v4i1(i1 %x, i1 %y) optsize {
 define <4 x i1> @buildvec_mask_nonconst_v4i1_2(i1 %x, i1 %y) {
 ; CHECK-LABEL: buildvec_mask_nonconst_v4i1_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    sb a1, 15(sp)
-; CHECK-NEXT:    li a1, 1
-; CHECK-NEXT:    sb a1, 14(sp)
-; CHECK-NEXT:    sb a0, 13(sp)
-; CHECK-NEXT:    sb zero, 12(sp)
-; CHECK-NEXT:    addi a0, sp, 12
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, zero
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1_2:
 ; ZVE32F:       # %bb.0:
-; ZVE32F-NEXT:    addi sp, sp, -16
-; ZVE32F-NEXT:    .cfi_def_cfa_offset 16
-; ZVE32F-NEXT:    sb a1, 15(sp)
-; ZVE32F-NEXT:    li a1, 1
-; ZVE32F-NEXT:    sb a1, 14(sp)
-; ZVE32F-NEXT:    sb a0, 13(sp)
-; ZVE32F-NEXT:    sb zero, 12(sp)
-; ZVE32F-NEXT:    addi a0, sp, 12
 ; ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32F-NEXT:    vle8.v v8, (a0)
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, zero
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    li a0, 1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVE32F-NEXT:    vand.vi v8, v8, 1
 ; ZVE32F-NEXT:    vmsne.vi v0, v8, 0
-; ZVE32F-NEXT:    addi sp, sp, 16
 ; ZVE32F-NEXT:    ret
   %1 = insertelement <4 x i1> poison, i1 0, i32 0
   %2 = insertelement <4 x i1> %1,  i1 %x, i32 1
@@ -325,44 +295,34 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) {
 define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
 ; CHECK-LABEL: buildvec_mask_nonconst_v8i1_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    sb a2, 15(sp)
-; CHECK-NEXT:    sb zero, 14(sp)
-; CHECK-NEXT:    sb a3, 13(sp)
-; CHECK-NEXT:    sb a0, 12(sp)
-; CHECK-NEXT:    sb a1, 11(sp)
-; CHECK-NEXT:    li a1, 1
-; CHECK-NEXT:    sb a1, 10(sp)
-; CHECK-NEXT:    sb a0, 9(sp)
-; CHECK-NEXT:    sb a0, 8(sp)
-; CHECK-NEXT:    addi a0, sp, 8
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    li a4, 1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a4
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vslide1down.vx v8, v8, zero
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1_2:
 ; ZVE32F:       # %bb.0:
-; ZVE32F-NEXT:    addi sp, sp, -16
-; ZVE32F-NEXT:    .cfi_def_cfa_offset 16
-; ZVE32F-NEXT:    sb a2, 15(sp)
-; ZVE32F-NEXT:    sb zero, 14(sp)
-; ZVE32F-NEXT:    sb a3, 13(sp)
-; ZVE32F-NEXT:    sb a0, 12(sp)
-; ZVE32F-NEXT:    sb a1, 11(sp)
-; ZVE32F-NEXT:    li a1, 1
-; ZVE32F-NEXT:    sb a1, 10(sp)
-; ZVE32F-NEXT:    sb a0, 9(sp)
-; ZVE32F-NEXT:    sb a0, 8(sp)
-; ZVE32F-NEXT:    addi a0, sp, 8
 ; ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; ZVE32F-NEXT:    vle8.v v8, (a0)
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    li a4, 1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, zero
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
 ; ZVE32F-NEXT:    vand.vi v8, v8, 1
 ; ZVE32F-NEXT:    vmsne.vi v0, v8, 0
-; ZVE32F-NEXT:    addi sp, sp, 16
 ; ZVE32F-NEXT:    ret
   %1 = insertelement <8 x i1> poison, i1 %x, i32 0
   %2 = insertelement <8 x i1> %1,  i1 %x, i32 1
@@ -378,44 +338,34 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
 define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) optsize {
 ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    sb a2, 15(sp)
-; CHECK-NEXT:    sb zero, 14(sp)
-; CHECK-NEXT:    sb a3, 13(sp)
-; CHECK-NEXT:    sb a0, 12(sp)
-; CHECK-NEXT:    sb a1, 11(sp)
-; CHECK-NEXT:    li a1, 1
-; CHECK-NEXT:    sb a1, 10(sp)
-; CHECK-NEXT:    sb a0, 9(sp)
-; CHECK-NEXT:    sb a0, 8(sp)
-; CHECK-NEXT:    addi a0, sp, 8
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    li a4, 1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a4
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vslide1down.vx v8, v8, zero
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1_2:
 ; ZVE32F:       # %bb.0:
-; ZVE32F-NEXT:    addi sp, sp, -16
-; ZVE32F-NEXT:    .cfi_def_cfa_offset 16
-; ZVE32F-NEXT:    sb a2, 15(sp)
-; ZVE32F-NEXT:    sb zero, 14(sp)
-; ZVE32F-NEXT:    sb a3, 13(sp)
-; ZVE32F-NEXT:    sb a0, 12(sp)
-; ZVE32F-NEXT:    sb a1, 11(sp)
-; ZVE32F-NEXT:    li a1, 1
-; ZVE32F-NEXT:    sb a1, 10(sp)
-; ZVE32F-NEXT:    sb a0, 9(sp)
-; ZVE32F-NEXT:    sb a0, 8(sp)
-; ZVE32F-NEXT:    addi a0, sp, 8
 ; ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; ZVE32F-NEXT:    vle8.v v8, (a0)
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    li a4, 1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, zero
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
 ; ZVE32F-NEXT:    vand.vi v8, v8, 1
 ; ZVE32F-NEXT:    vmsne.vi v0, v8, 0
-; ZVE32F-NEXT:    addi sp, sp, 16
 ; ZVE32F-NEXT:    ret
   %1 = insertelement <8 x i1> poison, i1 %x, i32 0
   %2 = insertelement <8 x i1> %1,  i1 %x, i32 1
@@ -431,42 +381,32 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
 define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
 ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    sb a1, 15(sp)
-; CHECK-NEXT:    sb a1, 14(sp)
-; CHECK-NEXT:    sb a1, 13(sp)
-; CHECK-NEXT:    sb a0, 12(sp)
-; CHECK-NEXT:    sb a1, 11(sp)
-; CHECK-NEXT:    sb a1, 10(sp)
-; CHECK-NEXT:    sb a0, 9(sp)
-; CHECK-NEXT:    sb a0, 8(sp)
-; CHECK-NEXT:    addi a0, sp, 8
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1:
 ; ZVE32F:       # %bb.0:
-; ZVE32F-NEXT:    addi sp, sp, -16
-; ZVE32F-NEXT:    .cfi_def_cfa_offset 16
-; ZVE32F-NEXT:    sb a1, 15(sp)
-; ZVE32F-NEXT:    sb a1, 14(sp)
-; ZVE32F-NEXT:    sb a1, 13(sp)
-; ZVE32F-NEXT:    sb a0, 12(sp)
-; ZVE32F-NEXT:    sb a1, 11(sp)
-; ZVE32F-NEXT:    sb a1, 10(sp)
-; ZVE32F-NEXT:    sb a0, 9(sp)
-; ZVE32F-NEXT:    sb a0, 8(sp)
-; ZVE32F-NEXT:    addi a0, sp, 8
 ; ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; ZVE32F-NEXT:    vle8.v v8, (a0)
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVE32F-NEXT:    vand.vi v8, v8, 1
 ; ZVE32F-NEXT:    vmsne.vi v0, v8, 0
-; ZVE32F-NEXT:    addi sp, sp, 16
 ; ZVE32F-NEXT:    ret
   %1 = insertelement <8 x i1> poison, i1 %x, i32 0
   %2 = insertelement <8 x i1> %1,  i1 %x, i32 1

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 1b26fba4df76..a30afd02bdd5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -6775,191 +6775,156 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
 ;
 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i64:
 ; RV32ZVE32F:       # %bb.0:
-; RV32ZVE32F-NEXT:    addi sp, sp, -96
-; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 96
-; RV32ZVE32F-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s2, 84(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s3, 80(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s4, 76(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s5, 72(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s6, 68(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s7, 64(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s8, 60(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s9, 56(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s10, 52(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s11, 48(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    .cfi_offset ra, -4
-; RV32ZVE32F-NEXT:    .cfi_offset s0, -8
-; RV32ZVE32F-NEXT:    .cfi_offset s2, -12
-; RV32ZVE32F-NEXT:    .cfi_offset s3, -16
-; RV32ZVE32F-NEXT:    .cfi_offset s4, -20
-; RV32ZVE32F-NEXT:    .cfi_offset s5, -24
-; RV32ZVE32F-NEXT:    .cfi_offset s6, -28
-; RV32ZVE32F-NEXT:    .cfi_offset s7, -32
-; RV32ZVE32F-NEXT:    .cfi_offset s8, -36
-; RV32ZVE32F-NEXT:    .cfi_offset s9, -40
-; RV32ZVE32F-NEXT:    .cfi_offset s10, -44
-; RV32ZVE32F-NEXT:    .cfi_offset s11, -48
-; RV32ZVE32F-NEXT:    addi s0, sp, 96
-; RV32ZVE32F-NEXT:    .cfi_def_cfa s0, 0
-; RV32ZVE32F-NEXT:    andi sp, sp, -32
-; RV32ZVE32F-NEXT:    lw a4, 60(a3)
-; RV32ZVE32F-NEXT:    lw a5, 56(a3)
-; RV32ZVE32F-NEXT:    lw a6, 52(a3)
-; RV32ZVE32F-NEXT:    lw a7, 48(a3)
-; RV32ZVE32F-NEXT:    lw t0, 44(a3)
-; RV32ZVE32F-NEXT:    lw t1, 40(a3)
-; RV32ZVE32F-NEXT:    lw t2, 36(a3)
-; RV32ZVE32F-NEXT:    lw t3, 32(a3)
-; RV32ZVE32F-NEXT:    lw t4, 28(a3)
-; RV32ZVE32F-NEXT:    lw t5, 24(a3)
-; RV32ZVE32F-NEXT:    lw t6, 20(a3)
-; RV32ZVE32F-NEXT:    lw s2, 16(a3)
-; RV32ZVE32F-NEXT:    lw s3, 12(a3)
-; RV32ZVE32F-NEXT:    lw s5, 8(a3)
-; RV32ZVE32F-NEXT:    lw s4, 4(a3)
-; RV32ZVE32F-NEXT:    lw a3, 0(a3)
-; RV32ZVE32F-NEXT:    lw s6, 0(a2)
-; RV32ZVE32F-NEXT:    lw s7, 8(a2)
-; RV32ZVE32F-NEXT:    lw s8, 16(a2)
-; RV32ZVE32F-NEXT:    lw s9, 24(a2)
-; RV32ZVE32F-NEXT:    lw s10, 56(a2)
-; RV32ZVE32F-NEXT:    lw s11, 48(a2)
-; RV32ZVE32F-NEXT:    lw ra, 40(a2)
-; RV32ZVE32F-NEXT:    lw a2, 32(a2)
-; RV32ZVE32F-NEXT:    sw s10, 28(sp)
-; RV32ZVE32F-NEXT:    sw s11, 24(sp)
-; RV32ZVE32F-NEXT:    sw ra, 20(sp)
-; RV32ZVE32F-NEXT:    sw a2, 16(sp)
-; RV32ZVE32F-NEXT:    sw s9, 12(sp)
-; RV32ZVE32F-NEXT:    sw s8, 8(sp)
-; RV32ZVE32F-NEXT:    sw s7, 4(sp)
-; RV32ZVE32F-NEXT:    sw s6, 0(sp)
-; RV32ZVE32F-NEXT:    mv a2, sp
+; RV32ZVE32F-NEXT:    addi sp, sp, -16
+; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
+; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
+; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
+; RV32ZVE32F-NEXT:    lw a4, 56(a2)
+; RV32ZVE32F-NEXT:    lw a5, 48(a2)
+; RV32ZVE32F-NEXT:    lw a6, 40(a2)
+; RV32ZVE32F-NEXT:    lw a7, 32(a2)
+; RV32ZVE32F-NEXT:    lw t0, 24(a2)
+; RV32ZVE32F-NEXT:    lw t1, 0(a2)
+; RV32ZVE32F-NEXT:    lw t2, 8(a2)
+; RV32ZVE32F-NEXT:    lw a2, 16(a2)
 ; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT:    vle32.v v8, (a2)
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t1
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t2
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t0
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a7
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a6
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a5
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
 ; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
 ; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
 ; RV32ZVE32F-NEXT:    vsetivli zero, 0, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT:    vmv.x.s a1, v0
-; RV32ZVE32F-NEXT:    andi a2, a1, 1
-; RV32ZVE32F-NEXT:    bnez a2, .LBB57_10
-; RV32ZVE32F-NEXT:  # %bb.1: # %else
-; RV32ZVE32F-NEXT:    andi a2, a1, 2
-; RV32ZVE32F-NEXT:    bnez a2, .LBB57_11
-; RV32ZVE32F-NEXT:  .LBB57_2: # %else2
-; RV32ZVE32F-NEXT:    andi a2, a1, 4
-; RV32ZVE32F-NEXT:    bnez a2, .LBB57_12
-; RV32ZVE32F-NEXT:  .LBB57_3: # %else5
-; RV32ZVE32F-NEXT:    andi a2, a1, 8
-; RV32ZVE32F-NEXT:    bnez a2, .LBB57_13
-; RV32ZVE32F-NEXT:  .LBB57_4: # %else8
-; RV32ZVE32F-NEXT:    andi a2, a1, 16
-; RV32ZVE32F-NEXT:    bnez a2, .LBB57_14
-; RV32ZVE32F-NEXT:  .LBB57_5: # %else11
-; RV32ZVE32F-NEXT:    andi a2, a1, 32
-; RV32ZVE32F-NEXT:    bnez a2, .LBB57_15
-; RV32ZVE32F-NEXT:  .LBB57_6: # %else14
-; RV32ZVE32F-NEXT:    andi a2, a1, 64
-; RV32ZVE32F-NEXT:    bnez a2, .LBB57_16
-; RV32ZVE32F-NEXT:  .LBB57_7: # %else17
-; RV32ZVE32F-NEXT:    andi a1, a1, -128
+; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
+; RV32ZVE32F-NEXT:    andi a1, t0, 1
 ; RV32ZVE32F-NEXT:    beqz a1, .LBB57_9
-; RV32ZVE32F-NEXT:  .LBB57_8: # %cond.load19
-; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
-; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
-; RV32ZVE32F-NEXT:    lw a4, 4(a1)
-; RV32ZVE32F-NEXT:    lw a5, 0(a1)
-; RV32ZVE32F-NEXT:  .LBB57_9: # %else20
-; RV32ZVE32F-NEXT:    sw a3, 0(a0)
-; RV32ZVE32F-NEXT:    sw s4, 4(a0)
-; RV32ZVE32F-NEXT:    sw s5, 8(a0)
-; RV32ZVE32F-NEXT:    sw s3, 12(a0)
-; RV32ZVE32F-NEXT:    sw s2, 16(a0)
-; RV32ZVE32F-NEXT:    sw t6, 20(a0)
-; RV32ZVE32F-NEXT:    sw t5, 24(a0)
-; RV32ZVE32F-NEXT:    sw t4, 28(a0)
-; RV32ZVE32F-NEXT:    sw t3, 32(a0)
-; RV32ZVE32F-NEXT:    sw t2, 36(a0)
-; RV32ZVE32F-NEXT:    sw t1, 40(a0)
-; RV32ZVE32F-NEXT:    sw t0, 44(a0)
-; RV32ZVE32F-NEXT:    sw a7, 48(a0)
-; RV32ZVE32F-NEXT:    sw a6, 52(a0)
-; RV32ZVE32F-NEXT:    sw a5, 56(a0)
-; RV32ZVE32F-NEXT:    sw a4, 60(a0)
-; RV32ZVE32F-NEXT:    addi sp, s0, -96
-; RV32ZVE32F-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s2, 84(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s3, 80(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s4, 76(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s5, 72(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s6, 68(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s7, 64(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s8, 60(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s9, 56(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s10, 52(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s11, 48(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    addi sp, sp, 96
-; RV32ZVE32F-NEXT:    ret
-; RV32ZVE32F-NEXT:  .LBB57_10: # %cond.load
+; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV32ZVE32F-NEXT:    vsetivli zero, 0, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
-; RV32ZVE32F-NEXT:    lw s4, 4(a2)
-; RV32ZVE32F-NEXT:    lw a3, 0(a2)
-; RV32ZVE32F-NEXT:    andi a2, a1, 2
-; RV32ZVE32F-NEXT:    beqz a2, .LBB57_2
-; RV32ZVE32F-NEXT:  .LBB57_11: # %cond.load1
+; RV32ZVE32F-NEXT:    lw a1, 4(a2)
+; RV32ZVE32F-NEXT:    lw a2, 0(a2)
+; RV32ZVE32F-NEXT:    andi a4, t0, 2
+; RV32ZVE32F-NEXT:    bnez a4, .LBB57_10
+; RV32ZVE32F-NEXT:  .LBB57_2:
+; RV32ZVE32F-NEXT:    lw a4, 12(a3)
+; RV32ZVE32F-NEXT:    lw a5, 8(a3)
+; RV32ZVE32F-NEXT:    andi a6, t0, 4
+; RV32ZVE32F-NEXT:    bnez a6, .LBB57_11
+; RV32ZVE32F-NEXT:  .LBB57_3:
+; RV32ZVE32F-NEXT:    lw a6, 20(a3)
+; RV32ZVE32F-NEXT:    lw a7, 16(a3)
+; RV32ZVE32F-NEXT:    andi t1, t0, 8
+; RV32ZVE32F-NEXT:    bnez t1, .LBB57_12
+; RV32ZVE32F-NEXT:  .LBB57_4:
+; RV32ZVE32F-NEXT:    lw t1, 28(a3)
+; RV32ZVE32F-NEXT:    lw t2, 24(a3)
+; RV32ZVE32F-NEXT:    andi t3, t0, 16
+; RV32ZVE32F-NEXT:    bnez t3, .LBB57_13
+; RV32ZVE32F-NEXT:  .LBB57_5:
+; RV32ZVE32F-NEXT:    lw t3, 36(a3)
+; RV32ZVE32F-NEXT:    lw t4, 32(a3)
+; RV32ZVE32F-NEXT:    andi t5, t0, 32
+; RV32ZVE32F-NEXT:    bnez t5, .LBB57_14
+; RV32ZVE32F-NEXT:  .LBB57_6:
+; RV32ZVE32F-NEXT:    lw t5, 44(a3)
+; RV32ZVE32F-NEXT:    lw t6, 40(a3)
+; RV32ZVE32F-NEXT:    andi s0, t0, 64
+; RV32ZVE32F-NEXT:    bnez s0, .LBB57_15
+; RV32ZVE32F-NEXT:  .LBB57_7:
+; RV32ZVE32F-NEXT:    lw s0, 52(a3)
+; RV32ZVE32F-NEXT:    lw s1, 48(a3)
+; RV32ZVE32F-NEXT:    andi t0, t0, -128
+; RV32ZVE32F-NEXT:    bnez t0, .LBB57_16
+; RV32ZVE32F-NEXT:  .LBB57_8:
+; RV32ZVE32F-NEXT:    lw t0, 60(a3)
+; RV32ZVE32F-NEXT:    lw a3, 56(a3)
+; RV32ZVE32F-NEXT:    j .LBB57_17
+; RV32ZVE32F-NEXT:  .LBB57_9:
+; RV32ZVE32F-NEXT:    lw a1, 4(a3)
+; RV32ZVE32F-NEXT:    lw a2, 0(a3)
+; RV32ZVE32F-NEXT:    andi a4, t0, 2
+; RV32ZVE32F-NEXT:    beqz a4, .LBB57_2
+; RV32ZVE32F-NEXT:  .LBB57_10: # %cond.load1
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
-; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
-; RV32ZVE32F-NEXT:    lw s3, 4(a2)
-; RV32ZVE32F-NEXT:    lw s5, 0(a2)
-; RV32ZVE32F-NEXT:    andi a2, a1, 4
-; RV32ZVE32F-NEXT:    beqz a2, .LBB57_3
-; RV32ZVE32F-NEXT:  .LBB57_12: # %cond.load4
+; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
+; RV32ZVE32F-NEXT:    lw a4, 4(a5)
+; RV32ZVE32F-NEXT:    lw a5, 0(a5)
+; RV32ZVE32F-NEXT:    andi a6, t0, 4
+; RV32ZVE32F-NEXT:    beqz a6, .LBB57_3
+; RV32ZVE32F-NEXT:  .LBB57_11: # %cond.load4
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
-; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
-; RV32ZVE32F-NEXT:    lw t6, 4(a2)
-; RV32ZVE32F-NEXT:    lw s2, 0(a2)
-; RV32ZVE32F-NEXT:    andi a2, a1, 8
-; RV32ZVE32F-NEXT:    beqz a2, .LBB57_4
-; RV32ZVE32F-NEXT:  .LBB57_13: # %cond.load7
+; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
+; RV32ZVE32F-NEXT:    lw a6, 4(a7)
+; RV32ZVE32F-NEXT:    lw a7, 0(a7)
+; RV32ZVE32F-NEXT:    andi t1, t0, 8
+; RV32ZVE32F-NEXT:    beqz t1, .LBB57_4
+; RV32ZVE32F-NEXT:  .LBB57_12: # %cond.load7
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
-; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
-; RV32ZVE32F-NEXT:    lw t4, 4(a2)
-; RV32ZVE32F-NEXT:    lw t5, 0(a2)
-; RV32ZVE32F-NEXT:    andi a2, a1, 16
-; RV32ZVE32F-NEXT:    beqz a2, .LBB57_5
-; RV32ZVE32F-NEXT:  .LBB57_14: # %cond.load10
+; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
+; RV32ZVE32F-NEXT:    lw t1, 4(t2)
+; RV32ZVE32F-NEXT:    lw t2, 0(t2)
+; RV32ZVE32F-NEXT:    andi t3, t0, 16
+; RV32ZVE32F-NEXT:    beqz t3, .LBB57_5
+; RV32ZVE32F-NEXT:  .LBB57_13: # %cond.load10
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
-; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
-; RV32ZVE32F-NEXT:    lw t2, 4(a2)
-; RV32ZVE32F-NEXT:    lw t3, 0(a2)
-; RV32ZVE32F-NEXT:    andi a2, a1, 32
-; RV32ZVE32F-NEXT:    beqz a2, .LBB57_6
-; RV32ZVE32F-NEXT:  .LBB57_15: # %cond.load13
+; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
+; RV32ZVE32F-NEXT:    lw t3, 4(t4)
+; RV32ZVE32F-NEXT:    lw t4, 0(t4)
+; RV32ZVE32F-NEXT:    andi t5, t0, 32
+; RV32ZVE32F-NEXT:    beqz t5, .LBB57_6
+; RV32ZVE32F-NEXT:  .LBB57_14: # %cond.load13
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
-; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
-; RV32ZVE32F-NEXT:    lw t0, 4(a2)
-; RV32ZVE32F-NEXT:    lw t1, 0(a2)
-; RV32ZVE32F-NEXT:    andi a2, a1, 64
-; RV32ZVE32F-NEXT:    beqz a2, .LBB57_7
-; RV32ZVE32F-NEXT:  .LBB57_16: # %cond.load16
+; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
+; RV32ZVE32F-NEXT:    lw t5, 4(t6)
+; RV32ZVE32F-NEXT:    lw t6, 0(t6)
+; RV32ZVE32F-NEXT:    andi s0, t0, 64
+; RV32ZVE32F-NEXT:    beqz s0, .LBB57_7
+; RV32ZVE32F-NEXT:  .LBB57_15: # %cond.load16
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
-; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
-; RV32ZVE32F-NEXT:    lw a6, 4(a2)
-; RV32ZVE32F-NEXT:    lw a7, 0(a2)
-; RV32ZVE32F-NEXT:    andi a1, a1, -128
-; RV32ZVE32F-NEXT:    bnez a1, .LBB57_8
-; RV32ZVE32F-NEXT:    j .LBB57_9
+; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
+; RV32ZVE32F-NEXT:    lw s0, 4(s1)
+; RV32ZVE32F-NEXT:    lw s1, 0(s1)
+; RV32ZVE32F-NEXT:    andi t0, t0, -128
+; RV32ZVE32F-NEXT:    beqz t0, .LBB57_8
+; RV32ZVE32F-NEXT:  .LBB57_16: # %cond.load19
+; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
+; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
+; RV32ZVE32F-NEXT:    lw t0, 4(a3)
+; RV32ZVE32F-NEXT:    lw a3, 0(a3)
+; RV32ZVE32F-NEXT:  .LBB57_17: # %else20
+; RV32ZVE32F-NEXT:    sw a2, 0(a0)
+; RV32ZVE32F-NEXT:    sw a1, 4(a0)
+; RV32ZVE32F-NEXT:    sw a5, 8(a0)
+; RV32ZVE32F-NEXT:    sw a4, 12(a0)
+; RV32ZVE32F-NEXT:    sw a7, 16(a0)
+; RV32ZVE32F-NEXT:    sw a6, 20(a0)
+; RV32ZVE32F-NEXT:    sw t2, 24(a0)
+; RV32ZVE32F-NEXT:    sw t1, 28(a0)
+; RV32ZVE32F-NEXT:    sw t4, 32(a0)
+; RV32ZVE32F-NEXT:    sw t3, 36(a0)
+; RV32ZVE32F-NEXT:    sw t6, 40(a0)
+; RV32ZVE32F-NEXT:    sw t5, 44(a0)
+; RV32ZVE32F-NEXT:    sw s1, 48(a0)
+; RV32ZVE32F-NEXT:    sw s0, 52(a0)
+; RV32ZVE32F-NEXT:    sw a3, 56(a0)
+; RV32ZVE32F-NEXT:    sw t0, 60(a0)
+; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    addi sp, sp, 16
+; RV32ZVE32F-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i64:
 ; RV64ZVE32F:       # %bb.0:
@@ -11974,34 +11939,23 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
 ;
 ; RV32ZVE32F-LABEL: mgather_baseidx_v8f64:
 ; RV32ZVE32F:       # %bb.0:
-; RV32ZVE32F-NEXT:    addi sp, sp, -64
-; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 64
-; RV32ZVE32F-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    .cfi_offset ra, -4
-; RV32ZVE32F-NEXT:    .cfi_offset s0, -8
-; RV32ZVE32F-NEXT:    addi s0, sp, 64
-; RV32ZVE32F-NEXT:    .cfi_def_cfa s0, 0
-; RV32ZVE32F-NEXT:    andi sp, sp, -32
-; RV32ZVE32F-NEXT:    lw a3, 0(a2)
-; RV32ZVE32F-NEXT:    lw a4, 8(a2)
-; RV32ZVE32F-NEXT:    lw a5, 16(a2)
-; RV32ZVE32F-NEXT:    lw a6, 24(a2)
-; RV32ZVE32F-NEXT:    lw a7, 56(a2)
-; RV32ZVE32F-NEXT:    lw t0, 48(a2)
-; RV32ZVE32F-NEXT:    lw t1, 40(a2)
-; RV32ZVE32F-NEXT:    lw a2, 32(a2)
-; RV32ZVE32F-NEXT:    sw a7, 28(sp)
-; RV32ZVE32F-NEXT:    sw t0, 24(sp)
-; RV32ZVE32F-NEXT:    sw t1, 20(sp)
-; RV32ZVE32F-NEXT:    sw a2, 16(sp)
-; RV32ZVE32F-NEXT:    sw a6, 12(sp)
-; RV32ZVE32F-NEXT:    sw a5, 8(sp)
-; RV32ZVE32F-NEXT:    sw a4, 4(sp)
-; RV32ZVE32F-NEXT:    sw a3, 0(sp)
-; RV32ZVE32F-NEXT:    mv a2, sp
+; RV32ZVE32F-NEXT:    lw a3, 56(a2)
+; RV32ZVE32F-NEXT:    lw a4, 48(a2)
+; RV32ZVE32F-NEXT:    lw a5, 40(a2)
+; RV32ZVE32F-NEXT:    lw a6, 32(a2)
+; RV32ZVE32F-NEXT:    lw a7, 24(a2)
+; RV32ZVE32F-NEXT:    lw t0, 0(a2)
+; RV32ZVE32F-NEXT:    lw t1, 8(a2)
+; RV32ZVE32F-NEXT:    lw a2, 16(a2)
 ; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT:    vle32.v v8, (a2)
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t0
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t1
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a7
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a6
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a5
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
 ; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
 ; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
 ; RV32ZVE32F-NEXT:    vsetivli zero, 0, e8, mf4, ta, ma
@@ -12043,10 +11997,6 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
 ; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
 ; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
 ; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
-; RV32ZVE32F-NEXT:    addi sp, s0, -64
-; RV32ZVE32F-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    addi sp, sp, 64
 ; RV32ZVE32F-NEXT:    ret
 ; RV32ZVE32F-NEXT:  .LBB96_10: # %cond.load
 ; RV32ZVE32F-NEXT:    vsetivli zero, 0, e32, m2, ta, ma

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 31bf057d9e4e..b00f8082b385 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -5684,22 +5684,19 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
 ;
 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64:
 ; RV32ZVE32F:       # %bb.0:
-; RV32ZVE32F-NEXT:    addi sp, sp, -96
-; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 96
-; RV32ZVE32F-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s2, 84(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s3, 80(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s4, 76(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s5, 72(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s6, 68(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s7, 64(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s8, 60(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s9, 56(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s10, 52(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s11, 48(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    .cfi_offset ra, -4
-; RV32ZVE32F-NEXT:    .cfi_offset s0, -8
+; RV32ZVE32F-NEXT:    addi sp, sp, -48
+; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 48
+; RV32ZVE32F-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
+; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
 ; RV32ZVE32F-NEXT:    .cfi_offset s2, -12
 ; RV32ZVE32F-NEXT:    .cfi_offset s3, -16
 ; RV32ZVE32F-NEXT:    .cfi_offset s4, -20
@@ -5707,12 +5704,6 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
 ; RV32ZVE32F-NEXT:    .cfi_offset s6, -28
 ; RV32ZVE32F-NEXT:    .cfi_offset s7, -32
 ; RV32ZVE32F-NEXT:    .cfi_offset s8, -36
-; RV32ZVE32F-NEXT:    .cfi_offset s9, -40
-; RV32ZVE32F-NEXT:    .cfi_offset s10, -44
-; RV32ZVE32F-NEXT:    .cfi_offset s11, -48
-; RV32ZVE32F-NEXT:    addi s0, sp, 96
-; RV32ZVE32F-NEXT:    .cfi_def_cfa s0, 0
-; RV32ZVE32F-NEXT:    andi sp, sp, -32
 ; RV32ZVE32F-NEXT:    lw a3, 60(a0)
 ; RV32ZVE32F-NEXT:    lw a4, 56(a0)
 ; RV32ZVE32F-NEXT:    lw a5, 52(a0)
@@ -5725,55 +5716,51 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
 ; RV32ZVE32F-NEXT:    lw t4, 24(a0)
 ; RV32ZVE32F-NEXT:    lw t5, 20(a0)
 ; RV32ZVE32F-NEXT:    lw t6, 16(a0)
-; RV32ZVE32F-NEXT:    lw s3, 12(a0)
-; RV32ZVE32F-NEXT:    lw s2, 8(a0)
-; RV32ZVE32F-NEXT:    lw s5, 4(a0)
-; RV32ZVE32F-NEXT:    lw s4, 0(a0)
-; RV32ZVE32F-NEXT:    lw a0, 0(a2)
-; RV32ZVE32F-NEXT:    lw s6, 8(a2)
-; RV32ZVE32F-NEXT:    lw s7, 16(a2)
-; RV32ZVE32F-NEXT:    lw s8, 24(a2)
-; RV32ZVE32F-NEXT:    lw s9, 56(a2)
-; RV32ZVE32F-NEXT:    lw s10, 48(a2)
-; RV32ZVE32F-NEXT:    lw s11, 40(a2)
-; RV32ZVE32F-NEXT:    lw a2, 32(a2)
-; RV32ZVE32F-NEXT:    sw s9, 28(sp)
-; RV32ZVE32F-NEXT:    sw s10, 24(sp)
-; RV32ZVE32F-NEXT:    sw s11, 20(sp)
-; RV32ZVE32F-NEXT:    sw a2, 16(sp)
-; RV32ZVE32F-NEXT:    sw s8, 12(sp)
-; RV32ZVE32F-NEXT:    sw s7, 8(sp)
-; RV32ZVE32F-NEXT:    sw s6, 4(sp)
-; RV32ZVE32F-NEXT:    sw a0, 0(sp)
-; RV32ZVE32F-NEXT:    mv a0, sp
+; RV32ZVE32F-NEXT:    lw s1, 12(a0)
+; RV32ZVE32F-NEXT:    lw s0, 8(a0)
+; RV32ZVE32F-NEXT:    lw s2, 56(a2)
+; RV32ZVE32F-NEXT:    lw s3, 48(a2)
+; RV32ZVE32F-NEXT:    lw s4, 40(a2)
+; RV32ZVE32F-NEXT:    lw s5, 32(a2)
+; RV32ZVE32F-NEXT:    lw s6, 24(a2)
+; RV32ZVE32F-NEXT:    lw s7, 0(a2)
+; RV32ZVE32F-NEXT:    lw s8, 8(a2)
+; RV32ZVE32F-NEXT:    lw a2, 16(a2)
 ; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT:    vle32.v v8, (a0)
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, s7
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, s8
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, s6
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, s5
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, s4
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, s3
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, s2
 ; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
 ; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
 ; RV32ZVE32F-NEXT:    vsetivli zero, 0, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT:    vmv.x.s a0, v0
-; RV32ZVE32F-NEXT:    andi a1, a0, 1
-; RV32ZVE32F-NEXT:    bnez a1, .LBB51_10
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v0
+; RV32ZVE32F-NEXT:    andi a2, a1, 1
+; RV32ZVE32F-NEXT:    bnez a2, .LBB51_10
 ; RV32ZVE32F-NEXT:  # %bb.1: # %else
-; RV32ZVE32F-NEXT:    andi a1, a0, 2
-; RV32ZVE32F-NEXT:    bnez a1, .LBB51_11
+; RV32ZVE32F-NEXT:    andi a0, a1, 2
+; RV32ZVE32F-NEXT:    bnez a0, .LBB51_11
 ; RV32ZVE32F-NEXT:  .LBB51_2: # %else2
-; RV32ZVE32F-NEXT:    andi a1, a0, 4
-; RV32ZVE32F-NEXT:    bnez a1, .LBB51_12
+; RV32ZVE32F-NEXT:    andi a0, a1, 4
+; RV32ZVE32F-NEXT:    bnez a0, .LBB51_12
 ; RV32ZVE32F-NEXT:  .LBB51_3: # %else4
-; RV32ZVE32F-NEXT:    andi a1, a0, 8
-; RV32ZVE32F-NEXT:    bnez a1, .LBB51_13
+; RV32ZVE32F-NEXT:    andi a0, a1, 8
+; RV32ZVE32F-NEXT:    bnez a0, .LBB51_13
 ; RV32ZVE32F-NEXT:  .LBB51_4: # %else6
-; RV32ZVE32F-NEXT:    andi a1, a0, 16
-; RV32ZVE32F-NEXT:    bnez a1, .LBB51_14
+; RV32ZVE32F-NEXT:    andi a0, a1, 16
+; RV32ZVE32F-NEXT:    bnez a0, .LBB51_14
 ; RV32ZVE32F-NEXT:  .LBB51_5: # %else8
-; RV32ZVE32F-NEXT:    andi a1, a0, 32
-; RV32ZVE32F-NEXT:    bnez a1, .LBB51_15
+; RV32ZVE32F-NEXT:    andi a0, a1, 32
+; RV32ZVE32F-NEXT:    bnez a0, .LBB51_15
 ; RV32ZVE32F-NEXT:  .LBB51_6: # %else10
-; RV32ZVE32F-NEXT:    andi a1, a0, 64
-; RV32ZVE32F-NEXT:    bnez a1, .LBB51_16
+; RV32ZVE32F-NEXT:    andi a0, a1, 64
+; RV32ZVE32F-NEXT:    bnez a0, .LBB51_16
 ; RV32ZVE32F-NEXT:  .LBB51_7: # %else12
-; RV32ZVE32F-NEXT:    andi a0, a0, -128
+; RV32ZVE32F-NEXT:    andi a0, a1, -128
 ; RV32ZVE32F-NEXT:    beqz a0, .LBB51_9
 ; RV32ZVE32F-NEXT:  .LBB51_8: # %cond.store13
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
@@ -5782,75 +5769,73 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
 ; RV32ZVE32F-NEXT:    sw a4, 0(a0)
 ; RV32ZVE32F-NEXT:    sw a3, 4(a0)
 ; RV32ZVE32F-NEXT:  .LBB51_9: # %else14
-; RV32ZVE32F-NEXT:    addi sp, s0, -96
-; RV32ZVE32F-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s2, 84(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s3, 80(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s4, 76(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s5, 72(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s6, 68(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s7, 64(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s8, 60(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s9, 56(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s10, 52(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s11, 48(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    addi sp, sp, 96
+; RV32ZVE32F-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    addi sp, sp, 48
 ; RV32ZVE32F-NEXT:    ret
 ; RV32ZVE32F-NEXT:  .LBB51_10: # %cond.store
+; RV32ZVE32F-NEXT:    lw a2, 4(a0)
+; RV32ZVE32F-NEXT:    lw a0, 0(a0)
 ; RV32ZVE32F-NEXT:    vsetivli zero, 0, e32, m2, ta, ma
-; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
-; RV32ZVE32F-NEXT:    sw s5, 4(a1)
-; RV32ZVE32F-NEXT:    sw s4, 0(a1)
-; RV32ZVE32F-NEXT:    andi a1, a0, 2
-; RV32ZVE32F-NEXT:    beqz a1, .LBB51_2
+; RV32ZVE32F-NEXT:    vmv.x.s s2, v8
+; RV32ZVE32F-NEXT:    sw a2, 4(s2)
+; RV32ZVE32F-NEXT:    sw a0, 0(s2)
+; RV32ZVE32F-NEXT:    andi a0, a1, 2
+; RV32ZVE32F-NEXT:    beqz a0, .LBB51_2
 ; RV32ZVE32F-NEXT:  .LBB51_11: # %cond.store1
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
-; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
-; RV32ZVE32F-NEXT:    sw s3, 4(a1)
-; RV32ZVE32F-NEXT:    sw s2, 0(a1)
-; RV32ZVE32F-NEXT:    andi a1, a0, 4
-; RV32ZVE32F-NEXT:    beqz a1, .LBB51_3
+; RV32ZVE32F-NEXT:    vmv.x.s a0, v10
+; RV32ZVE32F-NEXT:    sw s1, 4(a0)
+; RV32ZVE32F-NEXT:    sw s0, 0(a0)
+; RV32ZVE32F-NEXT:    andi a0, a1, 4
+; RV32ZVE32F-NEXT:    beqz a0, .LBB51_3
 ; RV32ZVE32F-NEXT:  .LBB51_12: # %cond.store3
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
-; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
-; RV32ZVE32F-NEXT:    sw t6, 0(a1)
-; RV32ZVE32F-NEXT:    sw t5, 4(a1)
-; RV32ZVE32F-NEXT:    andi a1, a0, 8
-; RV32ZVE32F-NEXT:    beqz a1, .LBB51_4
+; RV32ZVE32F-NEXT:    vmv.x.s a0, v10
+; RV32ZVE32F-NEXT:    sw t6, 0(a0)
+; RV32ZVE32F-NEXT:    sw t5, 4(a0)
+; RV32ZVE32F-NEXT:    andi a0, a1, 8
+; RV32ZVE32F-NEXT:    beqz a0, .LBB51_4
 ; RV32ZVE32F-NEXT:  .LBB51_13: # %cond.store5
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
-; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
-; RV32ZVE32F-NEXT:    sw t4, 0(a1)
-; RV32ZVE32F-NEXT:    sw t3, 4(a1)
-; RV32ZVE32F-NEXT:    andi a1, a0, 16
-; RV32ZVE32F-NEXT:    beqz a1, .LBB51_5
+; RV32ZVE32F-NEXT:    vmv.x.s a0, v10
+; RV32ZVE32F-NEXT:    sw t4, 0(a0)
+; RV32ZVE32F-NEXT:    sw t3, 4(a0)
+; RV32ZVE32F-NEXT:    andi a0, a1, 16
+; RV32ZVE32F-NEXT:    beqz a0, .LBB51_5
 ; RV32ZVE32F-NEXT:  .LBB51_14: # %cond.store7
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
-; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
-; RV32ZVE32F-NEXT:    sw t2, 0(a1)
-; RV32ZVE32F-NEXT:    sw t1, 4(a1)
-; RV32ZVE32F-NEXT:    andi a1, a0, 32
-; RV32ZVE32F-NEXT:    beqz a1, .LBB51_6
+; RV32ZVE32F-NEXT:    vmv.x.s a0, v10
+; RV32ZVE32F-NEXT:    sw t2, 0(a0)
+; RV32ZVE32F-NEXT:    sw t1, 4(a0)
+; RV32ZVE32F-NEXT:    andi a0, a1, 32
+; RV32ZVE32F-NEXT:    beqz a0, .LBB51_6
 ; RV32ZVE32F-NEXT:  .LBB51_15: # %cond.store9
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
-; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
-; RV32ZVE32F-NEXT:    sw t0, 0(a1)
-; RV32ZVE32F-NEXT:    sw a7, 4(a1)
-; RV32ZVE32F-NEXT:    andi a1, a0, 64
-; RV32ZVE32F-NEXT:    beqz a1, .LBB51_7
+; RV32ZVE32F-NEXT:    vmv.x.s a0, v10
+; RV32ZVE32F-NEXT:    sw t0, 0(a0)
+; RV32ZVE32F-NEXT:    sw a7, 4(a0)
+; RV32ZVE32F-NEXT:    andi a0, a1, 64
+; RV32ZVE32F-NEXT:    beqz a0, .LBB51_7
 ; RV32ZVE32F-NEXT:  .LBB51_16: # %cond.store11
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
-; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
-; RV32ZVE32F-NEXT:    sw a6, 0(a1)
-; RV32ZVE32F-NEXT:    sw a5, 4(a1)
-; RV32ZVE32F-NEXT:    andi a0, a0, -128
+; RV32ZVE32F-NEXT:    vmv.x.s a0, v10
+; RV32ZVE32F-NEXT:    sw a6, 0(a0)
+; RV32ZVE32F-NEXT:    sw a5, 4(a0)
+; RV32ZVE32F-NEXT:    andi a0, a1, -128
 ; RV32ZVE32F-NEXT:    bnez a0, .LBB51_8
 ; RV32ZVE32F-NEXT:    j .LBB51_9
 ;
@@ -10381,121 +10366,106 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
 ;
 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64:
 ; RV32ZVE32F:       # %bb.0:
-; RV32ZVE32F-NEXT:    addi sp, sp, -64
-; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 64
-; RV32ZVE32F-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT:    .cfi_offset ra, -4
-; RV32ZVE32F-NEXT:    .cfi_offset s0, -8
-; RV32ZVE32F-NEXT:    addi s0, sp, 64
-; RV32ZVE32F-NEXT:    .cfi_def_cfa s0, 0
-; RV32ZVE32F-NEXT:    andi sp, sp, -32
-; RV32ZVE32F-NEXT:    lw a2, 0(a1)
-; RV32ZVE32F-NEXT:    lw a3, 8(a1)
-; RV32ZVE32F-NEXT:    lw a4, 16(a1)
-; RV32ZVE32F-NEXT:    lw a5, 24(a1)
-; RV32ZVE32F-NEXT:    lw a6, 56(a1)
-; RV32ZVE32F-NEXT:    lw a7, 48(a1)
-; RV32ZVE32F-NEXT:    lw t0, 40(a1)
-; RV32ZVE32F-NEXT:    lw a1, 32(a1)
-; RV32ZVE32F-NEXT:    sw a6, 28(sp)
-; RV32ZVE32F-NEXT:    sw a7, 24(sp)
-; RV32ZVE32F-NEXT:    sw t0, 20(sp)
-; RV32ZVE32F-NEXT:    sw a1, 16(sp)
-; RV32ZVE32F-NEXT:    sw a5, 12(sp)
-; RV32ZVE32F-NEXT:    sw a4, 8(sp)
-; RV32ZVE32F-NEXT:    sw a3, 4(sp)
-; RV32ZVE32F-NEXT:    sw a2, 0(sp)
-; RV32ZVE32F-NEXT:    mv a1, sp
+; RV32ZVE32F-NEXT:    lw a2, 56(a1)
+; RV32ZVE32F-NEXT:    lw a3, 48(a1)
+; RV32ZVE32F-NEXT:    lw a4, 40(a1)
+; RV32ZVE32F-NEXT:    lw a5, 32(a1)
+; RV32ZVE32F-NEXT:    lw a6, 24(a1)
+; RV32ZVE32F-NEXT:    lw a7, 0(a1)
+; RV32ZVE32F-NEXT:    lw t0, 8(a1)
+; RV32ZVE32F-NEXT:    lw a1, 16(a1)
 ; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT:    vle32.v v8, (a1)
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a7
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t0
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a6
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a5
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
+; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
 ; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a0
 ; RV32ZVE32F-NEXT:    vsetivli zero, 0, e8, mf4, ta, ma
 ; RV32ZVE32F-NEXT:    vmv.x.s a0, v0
 ; RV32ZVE32F-NEXT:    andi a1, a0, 1
-; RV32ZVE32F-NEXT:    bnez a1, .LBB90_10
+; RV32ZVE32F-NEXT:    bnez a1, .LBB90_9
 ; RV32ZVE32F-NEXT:  # %bb.1: # %else
 ; RV32ZVE32F-NEXT:    andi a1, a0, 2
-; RV32ZVE32F-NEXT:    bnez a1, .LBB90_11
+; RV32ZVE32F-NEXT:    bnez a1, .LBB90_10
 ; RV32ZVE32F-NEXT:  .LBB90_2: # %else2
 ; RV32ZVE32F-NEXT:    andi a1, a0, 4
-; RV32ZVE32F-NEXT:    bnez a1, .LBB90_12
+; RV32ZVE32F-NEXT:    bnez a1, .LBB90_11
 ; RV32ZVE32F-NEXT:  .LBB90_3: # %else4
 ; RV32ZVE32F-NEXT:    andi a1, a0, 8
-; RV32ZVE32F-NEXT:    bnez a1, .LBB90_13
+; RV32ZVE32F-NEXT:    bnez a1, .LBB90_12
 ; RV32ZVE32F-NEXT:  .LBB90_4: # %else6
 ; RV32ZVE32F-NEXT:    andi a1, a0, 16
-; RV32ZVE32F-NEXT:    bnez a1, .LBB90_14
+; RV32ZVE32F-NEXT:    bnez a1, .LBB90_13
 ; RV32ZVE32F-NEXT:  .LBB90_5: # %else8
 ; RV32ZVE32F-NEXT:    andi a1, a0, 32
-; RV32ZVE32F-NEXT:    bnez a1, .LBB90_15
+; RV32ZVE32F-NEXT:    bnez a1, .LBB90_14
 ; RV32ZVE32F-NEXT:  .LBB90_6: # %else10
 ; RV32ZVE32F-NEXT:    andi a1, a0, 64
-; RV32ZVE32F-NEXT:    bnez a1, .LBB90_16
+; RV32ZVE32F-NEXT:    bnez a1, .LBB90_15
 ; RV32ZVE32F-NEXT:  .LBB90_7: # %else12
 ; RV32ZVE32F-NEXT:    andi a0, a0, -128
-; RV32ZVE32F-NEXT:    beqz a0, .LBB90_9
-; RV32ZVE32F-NEXT:  .LBB90_8: # %cond.store13
-; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
-; RV32ZVE32F-NEXT:    vmv.x.s a0, v8
-; RV32ZVE32F-NEXT:    fsd fa7, 0(a0)
-; RV32ZVE32F-NEXT:  .LBB90_9: # %else14
-; RV32ZVE32F-NEXT:    addi sp, s0, -64
-; RV32ZVE32F-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT:    addi sp, sp, 64
+; RV32ZVE32F-NEXT:    bnez a0, .LBB90_16
+; RV32ZVE32F-NEXT:  .LBB90_8: # %else14
 ; RV32ZVE32F-NEXT:    ret
-; RV32ZVE32F-NEXT:  .LBB90_10: # %cond.store
+; RV32ZVE32F-NEXT:  .LBB90_9: # %cond.store
 ; RV32ZVE32F-NEXT:    vsetivli zero, 0, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
 ; RV32ZVE32F-NEXT:    fsd fa0, 0(a1)
 ; RV32ZVE32F-NEXT:    andi a1, a0, 2
 ; RV32ZVE32F-NEXT:    beqz a1, .LBB90_2
-; RV32ZVE32F-NEXT:  .LBB90_11: # %cond.store1
+; RV32ZVE32F-NEXT:  .LBB90_10: # %cond.store1
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
 ; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
 ; RV32ZVE32F-NEXT:    fsd fa1, 0(a1)
 ; RV32ZVE32F-NEXT:    andi a1, a0, 4
 ; RV32ZVE32F-NEXT:    beqz a1, .LBB90_3
-; RV32ZVE32F-NEXT:  .LBB90_12: # %cond.store3
+; RV32ZVE32F-NEXT:  .LBB90_11: # %cond.store3
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
 ; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
 ; RV32ZVE32F-NEXT:    fsd fa2, 0(a1)
 ; RV32ZVE32F-NEXT:    andi a1, a0, 8
 ; RV32ZVE32F-NEXT:    beqz a1, .LBB90_4
-; RV32ZVE32F-NEXT:  .LBB90_13: # %cond.store5
+; RV32ZVE32F-NEXT:  .LBB90_12: # %cond.store5
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
 ; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
 ; RV32ZVE32F-NEXT:    fsd fa3, 0(a1)
 ; RV32ZVE32F-NEXT:    andi a1, a0, 16
 ; RV32ZVE32F-NEXT:    beqz a1, .LBB90_5
-; RV32ZVE32F-NEXT:  .LBB90_14: # %cond.store7
+; RV32ZVE32F-NEXT:  .LBB90_13: # %cond.store7
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
 ; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
 ; RV32ZVE32F-NEXT:    fsd fa4, 0(a1)
 ; RV32ZVE32F-NEXT:    andi a1, a0, 32
 ; RV32ZVE32F-NEXT:    beqz a1, .LBB90_6
-; RV32ZVE32F-NEXT:  .LBB90_15: # %cond.store9
+; RV32ZVE32F-NEXT:  .LBB90_14: # %cond.store9
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
 ; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
 ; RV32ZVE32F-NEXT:    fsd fa5, 0(a1)
 ; RV32ZVE32F-NEXT:    andi a1, a0, 64
 ; RV32ZVE32F-NEXT:    beqz a1, .LBB90_7
-; RV32ZVE32F-NEXT:  .LBB90_16: # %cond.store11
+; RV32ZVE32F-NEXT:  .LBB90_15: # %cond.store11
 ; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
 ; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
 ; RV32ZVE32F-NEXT:    fsd fa6, 0(a1)
 ; RV32ZVE32F-NEXT:    andi a0, a0, -128
-; RV32ZVE32F-NEXT:    bnez a0, .LBB90_8
-; RV32ZVE32F-NEXT:    j .LBB90_9
+; RV32ZVE32F-NEXT:    beqz a0, .LBB90_8
+; RV32ZVE32F-NEXT:  .LBB90_16: # %cond.store13
+; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
+; RV32ZVE32F-NEXT:    vmv.x.s a0, v8
+; RV32ZVE32F-NEXT:    fsd fa7, 0(a0)
+; RV32ZVE32F-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f64:
 ; RV64ZVE32F:       # %bb.0:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index 543f35d3ad63..81a3d7141daa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -103,26 +103,23 @@ define void @store_v6i16(ptr %p, <6 x i16> %v) {
 define void @store_v6f16(ptr %p, <6 x half> %v) {
 ; RV32-LABEL: store_v6f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lh a2, 4(a1)
-; RV32-NEXT:    lhu a3, 0(a1)
+; RV32-NEXT:    lh a2, 20(a1)
+; RV32-NEXT:    lhu a3, 16(a1)
 ; RV32-NEXT:    slli a2, a2, 16
 ; RV32-NEXT:    or a2, a3, a2
 ; RV32-NEXT:    lh a3, 12(a1)
 ; RV32-NEXT:    lhu a4, 8(a1)
-; RV32-NEXT:    lh a5, 20(a1)
-; RV32-NEXT:    lhu a1, 16(a1)
+; RV32-NEXT:    lh a5, 4(a1)
+; RV32-NEXT:    lhu a1, 0(a1)
 ; RV32-NEXT:    slli a3, a3, 16
 ; RV32-NEXT:    or a3, a4, a3
 ; RV32-NEXT:    slli a5, a5, 16
 ; RV32-NEXT:    or a1, a1, a5
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    sw a3, 4(sp)
-; RV32-NEXT:    sw a2, 0(sp)
-; RV32-NEXT:    mv a1, sp
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vle32.v v8, (a1)
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a0)
 ; RV32-NEXT:    vslidedown.vi v9, v8, 2
@@ -131,7 +128,6 @@ define void @store_v6f16(ptr %p, <6 x half> %v) {
 ; RV32-NEXT:    vslidedown.vi v8, v8, 1
 ; RV32-NEXT:    addi a0, a0, 4
 ; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: store_v6f16:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index 1b3216e15c9c..acb8477fa864 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -5,29 +5,27 @@
 define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vv_v6i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    lbu a2, 0(a2)
 ; RV32-NEXT:    vle32.v v8, (a1)
-; RV32-NEXT:    srli a1, a2, 5
-; RV32-NEXT:    sb a1, 13(sp)
 ; RV32-NEXT:    andi a1, a2, 1
-; RV32-NEXT:    sb a1, 8(sp)
-; RV32-NEXT:    slli a1, a2, 27
+; RV32-NEXT:    vslide1down.vx v10, v8, a1
+; RV32-NEXT:    slli a1, a2, 30
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    slli a1, a2, 29
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 12(sp)
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
 ; RV32-NEXT:    slli a1, a2, 28
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 11(sp)
-; RV32-NEXT:    slli a1, a2, 29
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    slli a1, a2, 27
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 10(sp)
-; RV32-NEXT:    slli a2, a2, 30
-; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 9(sp)
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vle8.v v10, (a1)
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    srli a2, a2, 5
+; RV32-NEXT:    vslide1down.vx v10, v10, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    vand.vi v10, v10, 1
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
@@ -39,34 +37,31 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vse32.v v10, (a0)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a3)
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vv_v6i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    lbu a2, 0(a2)
 ; RV64-NEXT:    vle32.v v8, (a1)
-; RV64-NEXT:    srli a1, a2, 5
-; RV64-NEXT:    sb a1, 13(sp)
 ; RV64-NEXT:    andi a1, a2, 1
-; RV64-NEXT:    sb a1, 8(sp)
-; RV64-NEXT:    slli a1, a2, 59
+; RV64-NEXT:    vslide1down.vx v10, v8, a1
+; RV64-NEXT:    slli a1, a2, 62
+; RV64-NEXT:    srli a1, a1, 63
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    slli a1, a2, 61
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 12(sp)
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
 ; RV64-NEXT:    slli a1, a2, 60
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 11(sp)
-; RV64-NEXT:    slli a1, a2, 61
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    slli a1, a2, 59
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 10(sp)
-; RV64-NEXT:    slli a2, a2, 62
-; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 9(sp)
-; RV64-NEXT:    addi a1, sp, 8
-; RV64-NEXT:    vle8.v v10, (a1)
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    srli a2, a2, 5
+; RV64-NEXT:    vslide1down.vx v10, v10, a2
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    vand.vi v10, v10, 1
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
@@ -77,7 +72,6 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vse64.v v10, (a0)
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a3)
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %va = load <6 x i32>, ptr %a
   %vb = load <6 x i32>, ptr %b
@@ -90,29 +84,27 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vx_v6i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    lbu a2, 0(a2)
 ; RV32-NEXT:    vle32.v v8, (a1)
-; RV32-NEXT:    srli a1, a2, 5
-; RV32-NEXT:    sb a1, 13(sp)
 ; RV32-NEXT:    andi a1, a2, 1
-; RV32-NEXT:    sb a1, 8(sp)
-; RV32-NEXT:    slli a1, a2, 27
+; RV32-NEXT:    vslide1down.vx v10, v8, a1
+; RV32-NEXT:    slli a1, a2, 30
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    slli a1, a2, 29
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 12(sp)
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
 ; RV32-NEXT:    slli a1, a2, 28
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 11(sp)
-; RV32-NEXT:    slli a1, a2, 29
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    slli a1, a2, 27
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 10(sp)
-; RV32-NEXT:    slli a2, a2, 30
-; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 9(sp)
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vle8.v v10, (a1)
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    srli a2, a2, 5
+; RV32-NEXT:    vslide1down.vx v10, v10, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    vand.vi v10, v10, 1
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -124,34 +116,31 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vse32.v v10, (a0)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a3)
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vx_v6i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    lbu a2, 0(a2)
 ; RV64-NEXT:    vle32.v v8, (a1)
-; RV64-NEXT:    srli a1, a2, 5
-; RV64-NEXT:    sb a1, 13(sp)
 ; RV64-NEXT:    andi a1, a2, 1
-; RV64-NEXT:    sb a1, 8(sp)
-; RV64-NEXT:    slli a1, a2, 59
+; RV64-NEXT:    vslide1down.vx v10, v8, a1
+; RV64-NEXT:    slli a1, a2, 62
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 12(sp)
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    slli a1, a2, 61
+; RV64-NEXT:    srli a1, a1, 63
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
 ; RV64-NEXT:    slli a1, a2, 60
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 11(sp)
-; RV64-NEXT:    slli a1, a2, 61
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    slli a1, a2, 59
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 10(sp)
-; RV64-NEXT:    slli a2, a2, 62
-; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 9(sp)
-; RV64-NEXT:    addi a1, sp, 8
-; RV64-NEXT:    vle8.v v10, (a1)
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    srli a2, a2, 5
+; RV64-NEXT:    vslide1down.vx v10, v10, a2
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    vand.vi v10, v10, 1
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -162,7 +151,6 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vse64.v v10, (a0)
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a3)
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %vb = load <6 x i32>, ptr %b
   %ahead = insertelement <6 x i32> poison, i32 %a, i32 0
@@ -176,29 +164,27 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vi_v6i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    lbu a1, 0(a1)
 ; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    srli a0, a1, 5
-; RV32-NEXT:    sb a0, 13(sp)
 ; RV32-NEXT:    andi a0, a1, 1
-; RV32-NEXT:    sb a0, 8(sp)
-; RV32-NEXT:    slli a0, a1, 27
+; RV32-NEXT:    vslide1down.vx v10, v8, a0
+; RV32-NEXT:    slli a0, a1, 30
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    slli a0, a1, 29
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    slli a0, a1, 28
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 11(sp)
-; RV32-NEXT:    slli a0, a1, 29
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    slli a0, a1, 27
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 10(sp)
-; RV32-NEXT:    slli a1, a1, 30
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 9(sp)
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vle8.v v10, (a0)
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    srli a1, a1, 5
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    vand.vi v10, v10, 1
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -210,34 +196,31 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vse32.v v10, (a0)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a2)
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vi_v6i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    lbu a1, 0(a1)
 ; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    srli a0, a1, 5
-; RV64-NEXT:    sb a0, 13(sp)
 ; RV64-NEXT:    andi a0, a1, 1
-; RV64-NEXT:    sb a0, 8(sp)
-; RV64-NEXT:    slli a0, a1, 59
+; RV64-NEXT:    vslide1down.vx v10, v8, a0
+; RV64-NEXT:    slli a0, a1, 62
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    slli a0, a1, 61
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 12(sp)
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    slli a0, a1, 60
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 11(sp)
-; RV64-NEXT:    slli a0, a1, 61
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    slli a0, a1, 59
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 10(sp)
-; RV64-NEXT:    slli a1, a1, 62
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 9(sp)
-; RV64-NEXT:    addi a0, sp, 8
-; RV64-NEXT:    vle8.v v10, (a0)
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    srli a1, a1, 5
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    vand.vi v10, v10, 1
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -248,7 +231,6 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vse64.v v10, (a0)
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a2)
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %vb = load <6 x i32>, ptr %b
   %a = insertelement <6 x i32> poison, i32 -1, i32 0
@@ -263,29 +245,27 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vv_v6f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    lbu a2, 0(a2)
 ; RV32-NEXT:    vle32.v v8, (a1)
-; RV32-NEXT:    srli a1, a2, 5
-; RV32-NEXT:    sb a1, 13(sp)
 ; RV32-NEXT:    andi a1, a2, 1
-; RV32-NEXT:    sb a1, 8(sp)
-; RV32-NEXT:    slli a1, a2, 27
+; RV32-NEXT:    vslide1down.vx v10, v8, a1
+; RV32-NEXT:    slli a1, a2, 30
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 12(sp)
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    slli a1, a2, 29
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
 ; RV32-NEXT:    slli a1, a2, 28
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 11(sp)
-; RV32-NEXT:    slli a1, a2, 29
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    slli a1, a2, 27
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 10(sp)
-; RV32-NEXT:    slli a2, a2, 30
-; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 9(sp)
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vle8.v v10, (a1)
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    srli a2, a2, 5
+; RV32-NEXT:    vslide1down.vx v10, v10, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    vand.vi v10, v10, 1
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
@@ -297,34 +277,31 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vse32.v v10, (a0)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a3)
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vv_v6f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    lbu a2, 0(a2)
 ; RV64-NEXT:    vle32.v v8, (a1)
-; RV64-NEXT:    srli a1, a2, 5
-; RV64-NEXT:    sb a1, 13(sp)
 ; RV64-NEXT:    andi a1, a2, 1
-; RV64-NEXT:    sb a1, 8(sp)
-; RV64-NEXT:    slli a1, a2, 59
+; RV64-NEXT:    vslide1down.vx v10, v8, a1
+; RV64-NEXT:    slli a1, a2, 62
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 12(sp)
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    slli a1, a2, 61
+; RV64-NEXT:    srli a1, a1, 63
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
 ; RV64-NEXT:    slli a1, a2, 60
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 11(sp)
-; RV64-NEXT:    slli a1, a2, 61
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    slli a1, a2, 59
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 10(sp)
-; RV64-NEXT:    slli a2, a2, 62
-; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 9(sp)
-; RV64-NEXT:    addi a1, sp, 8
-; RV64-NEXT:    vle8.v v10, (a1)
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    srli a2, a2, 5
+; RV64-NEXT:    vslide1down.vx v10, v10, a2
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    vand.vi v10, v10, 1
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
@@ -335,7 +312,6 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vse64.v v10, (a0)
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a3)
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %va = load <6 x float>, ptr %a
   %vb = load <6 x float>, ptr %b
@@ -348,29 +324,27 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vx_v6f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    lbu a1, 0(a1)
 ; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    srli a0, a1, 5
-; RV32-NEXT:    sb a0, 13(sp)
 ; RV32-NEXT:    andi a0, a1, 1
-; RV32-NEXT:    sb a0, 8(sp)
-; RV32-NEXT:    slli a0, a1, 27
+; RV32-NEXT:    vslide1down.vx v10, v8, a0
+; RV32-NEXT:    slli a0, a1, 30
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    slli a0, a1, 29
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    slli a0, a1, 28
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 11(sp)
-; RV32-NEXT:    slli a0, a1, 29
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    slli a0, a1, 27
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 10(sp)
-; RV32-NEXT:    slli a1, a1, 30
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 9(sp)
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vle8.v v10, (a0)
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    srli a1, a1, 5
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    vand.vi v10, v10, 1
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -382,34 +356,31 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vse32.v v10, (a0)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a2)
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vx_v6f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    lbu a1, 0(a1)
 ; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    srli a0, a1, 5
-; RV64-NEXT:    sb a0, 13(sp)
 ; RV64-NEXT:    andi a0, a1, 1
-; RV64-NEXT:    sb a0, 8(sp)
-; RV64-NEXT:    slli a0, a1, 59
+; RV64-NEXT:    vslide1down.vx v10, v8, a0
+; RV64-NEXT:    slli a0, a1, 62
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 12(sp)
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    slli a0, a1, 61
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    slli a0, a1, 60
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 11(sp)
-; RV64-NEXT:    slli a0, a1, 61
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    slli a0, a1, 59
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 10(sp)
-; RV64-NEXT:    slli a1, a1, 62
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 9(sp)
-; RV64-NEXT:    addi a0, sp, 8
-; RV64-NEXT:    vle8.v v10, (a0)
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    srli a1, a1, 5
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    vand.vi v10, v10, 1
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -420,7 +391,6 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vse64.v v10, (a0)
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a2)
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %vb = load <6 x float>, ptr %b
   %ahead = insertelement <6 x float> poison, float %a, i32 0
@@ -434,29 +404,27 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vfpzero_v6f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    lbu a1, 0(a1)
 ; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    srli a0, a1, 5
-; RV32-NEXT:    sb a0, 13(sp)
 ; RV32-NEXT:    andi a0, a1, 1
-; RV32-NEXT:    sb a0, 8(sp)
-; RV32-NEXT:    slli a0, a1, 27
+; RV32-NEXT:    vslide1down.vx v10, v8, a0
+; RV32-NEXT:    slli a0, a1, 30
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    slli a0, a1, 29
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    slli a0, a1, 28
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 11(sp)
-; RV32-NEXT:    slli a0, a1, 29
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    slli a0, a1, 27
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 10(sp)
-; RV32-NEXT:    slli a1, a1, 30
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 9(sp)
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vle8.v v10, (a0)
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    srli a1, a1, 5
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    vand.vi v10, v10, 1
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -468,34 +436,31 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vse32.v v10, (a0)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a2)
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vfpzero_v6f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    lbu a1, 0(a1)
 ; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    srli a0, a1, 5
-; RV64-NEXT:    sb a0, 13(sp)
 ; RV64-NEXT:    andi a0, a1, 1
-; RV64-NEXT:    sb a0, 8(sp)
-; RV64-NEXT:    slli a0, a1, 59
+; RV64-NEXT:    vslide1down.vx v10, v8, a0
+; RV64-NEXT:    slli a0, a1, 62
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    slli a0, a1, 61
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 12(sp)
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    slli a0, a1, 60
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 11(sp)
-; RV64-NEXT:    slli a0, a1, 61
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    slli a0, a1, 59
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 10(sp)
-; RV64-NEXT:    slli a1, a1, 62
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 9(sp)
-; RV64-NEXT:    addi a0, sp, 8
-; RV64-NEXT:    vle8.v v10, (a0)
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    srli a1, a1, 5
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    vand.vi v10, v10, 1
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -506,7 +471,6 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vse64.v v10, (a0)
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a2)
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %vb = load <6 x float>, ptr %b
   %a = insertelement <6 x float> poison, float 0.0, i32 0

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index 1c8463978cf2..67eb190e8cb3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -7,242 +7,230 @@
 define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) {
 ; RV32-LABEL: vector_deinterleave_v16i1_v32i1:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV32-NEXT:    vfirst.m a0, v0
 ; RV32-NEXT:    seqz a0, a0
-; RV32-NEXT:    sb a0, 16(sp)
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV32-NEXT:    vmv.x.s a0, v0
-; RV32-NEXT:    slli a1, a0, 17
+; RV32-NEXT:    slli a1, a0, 29
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 23(sp)
-; RV32-NEXT:    slli a1, a0, 19
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    slli a1, a0, 27
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 22(sp)
-; RV32-NEXT:    slli a1, a0, 21
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    slli a1, a0, 25
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 21(sp)
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-NEXT:    slli a1, a0, 23
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 20(sp)
-; RV32-NEXT:    slli a1, a0, 25
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    slli a1, a0, 21
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 19(sp)
-; RV32-NEXT:    slli a1, a0, 27
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    slli a1, a0, 19
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 18(sp)
-; RV32-NEXT:    slli a1, a0, 29
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    slli a1, a0, 17
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 17(sp)
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v0, 2
+; RV32-NEXT:    vslidedown.vi v9, v0, 2
 ; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vfirst.m a1, v8
+; RV32-NEXT:    vfirst.m a1, v9
 ; RV32-NEXT:    seqz a1, a1
-; RV32-NEXT:    sb a1, 24(sp)
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.x.s a1, v8
-; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    vmv.x.s a1, v9
+; RV32-NEXT:    slli a2, a1, 29
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 31(sp)
-; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 27
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 30(sp)
-; RV32-NEXT:    slli a2, a1, 21
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 25
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 29(sp)
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32-NEXT:    slli a2, a1, 23
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 28(sp)
-; RV32-NEXT:    slli a2, a1, 25
-; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 27(sp)
-; RV32-NEXT:    slli a2, a1, 27
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 21
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 26(sp)
-; RV32-NEXT:    slli a2, a1, 29
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 19
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 25(sp)
-; RV32-NEXT:    slli a2, a0, 16
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 17
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 7(sp)
-; RV32-NEXT:    slli a2, a0, 18
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vand.vi v8, v8, 1
+; RV32-NEXT:    vmsne.vi v0, v8, 0
+; RV32-NEXT:    slli a2, a0, 30
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 6(sp)
-; RV32-NEXT:    slli a2, a0, 20
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 28
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 5(sp)
-; RV32-NEXT:    slli a2, a0, 22
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 26
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 4(sp)
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32-NEXT:    slli a2, a0, 24
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 3(sp)
-; RV32-NEXT:    slli a2, a0, 26
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 22
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 2(sp)
-; RV32-NEXT:    slli a2, a0, 28
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 20
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 1(sp)
-; RV32-NEXT:    slli a0, a0, 30
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 18
+; RV32-NEXT:    srli a2, a2, 31
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a0, a0, 16
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 0(sp)
-; RV32-NEXT:    slli a0, a1, 16
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    slli a0, a1, 30
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 15(sp)
-; RV32-NEXT:    slli a0, a1, 18
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 14(sp)
-; RV32-NEXT:    slli a0, a1, 20
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    slli a0, a1, 28
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 13(sp)
-; RV32-NEXT:    slli a0, a1, 22
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    slli a0, a1, 26
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-NEXT:    slli a0, a1, 24
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 11(sp)
-; RV32-NEXT:    slli a0, a1, 26
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    slli a0, a1, 22
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 10(sp)
-; RV32-NEXT:    slli a0, a1, 28
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    slli a0, a1, 20
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    slli a0, a1, 18
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 9(sp)
-; RV32-NEXT:    slli a1, a1, 30
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    slli a1, a1, 16
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 8(sp)
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    mv a0, sp
-; RV32-NEXT:    vle8.v v9, (a0)
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-NEXT:    vand.vi v8, v8, 1
-; RV32-NEXT:    vmsne.vi v0, v8, 0
-; RV32-NEXT:    vand.vi v8, v9, 1
 ; RV32-NEXT:    vmsne.vi v8, v8, 0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vector_deinterleave_v16i1_v32i1:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -32
-; RV64-NEXT:    .cfi_def_cfa_offset 32
 ; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV64-NEXT:    vfirst.m a0, v0
 ; RV64-NEXT:    seqz a0, a0
-; RV64-NEXT:    sb a0, 16(sp)
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
 ; RV64-NEXT:    vmv.x.s a0, v0
-; RV64-NEXT:    slli a1, a0, 49
+; RV64-NEXT:    slli a1, a0, 61
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 23(sp)
-; RV64-NEXT:    slli a1, a0, 51
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    slli a1, a0, 59
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 22(sp)
-; RV64-NEXT:    slli a1, a0, 53
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    slli a1, a0, 57
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 21(sp)
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-NEXT:    slli a1, a0, 55
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 20(sp)
-; RV64-NEXT:    slli a1, a0, 57
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    slli a1, a0, 53
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 19(sp)
-; RV64-NEXT:    slli a1, a0, 59
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    slli a1, a0, 51
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 18(sp)
-; RV64-NEXT:    slli a1, a0, 61
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    slli a1, a0, 49
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 17(sp)
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v0, 2
+; RV64-NEXT:    vslidedown.vi v9, v0, 2
 ; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vfirst.m a1, v8
+; RV64-NEXT:    vfirst.m a1, v9
 ; RV64-NEXT:    seqz a1, a1
-; RV64-NEXT:    sb a1, 24(sp)
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.x.s a1, v8
-; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    vmv.x.s a1, v9
+; RV64-NEXT:    slli a2, a1, 61
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 31(sp)
-; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 59
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 30(sp)
-; RV64-NEXT:    slli a2, a1, 53
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 57
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 29(sp)
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
 ; RV64-NEXT:    slli a2, a1, 55
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 28(sp)
-; RV64-NEXT:    slli a2, a1, 57
-; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 27(sp)
-; RV64-NEXT:    slli a2, a1, 59
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 53
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 26(sp)
-; RV64-NEXT:    slli a2, a1, 61
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 51
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 25(sp)
-; RV64-NEXT:    slli a2, a0, 48
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 49
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 7(sp)
-; RV64-NEXT:    slli a2, a0, 50
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    vand.vi v8, v8, 1
+; RV64-NEXT:    vmsne.vi v0, v8, 0
+; RV64-NEXT:    slli a2, a0, 62
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 6(sp)
-; RV64-NEXT:    slli a2, a0, 52
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 60
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 5(sp)
-; RV64-NEXT:    slli a2, a0, 54
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 58
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 4(sp)
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
 ; RV64-NEXT:    slli a2, a0, 56
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 3(sp)
-; RV64-NEXT:    slli a2, a0, 58
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 54
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 2(sp)
-; RV64-NEXT:    slli a2, a0, 60
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 52
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 1(sp)
-; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 50
+; RV64-NEXT:    srli a2, a2, 63
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 0(sp)
-; RV64-NEXT:    slli a0, a1, 48
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    slli a0, a1, 62
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 15(sp)
-; RV64-NEXT:    slli a0, a1, 50
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 14(sp)
-; RV64-NEXT:    slli a0, a1, 52
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    slli a0, a1, 60
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 13(sp)
-; RV64-NEXT:    slli a0, a1, 54
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    slli a0, a1, 58
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 12(sp)
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-NEXT:    slli a0, a1, 56
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 11(sp)
-; RV64-NEXT:    slli a0, a1, 58
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    slli a0, a1, 54
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 10(sp)
-; RV64-NEXT:    slli a0, a1, 60
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    slli a0, a1, 52
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    slli a0, a1, 50
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 9(sp)
-; RV64-NEXT:    slli a1, a1, 62
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    slli a1, a1, 48
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 8(sp)
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    mv a0, sp
-; RV64-NEXT:    vle8.v v9, (a0)
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-NEXT:    vand.vi v8, v8, 1
-; RV64-NEXT:    vmsne.vi v0, v8, 0
-; RV64-NEXT:    vand.vi v8, v9, 1
 ; RV64-NEXT:    vmsne.vi v8, v8, 0
-; RV64-NEXT:    addi sp, sp, 32
 ; RV64-NEXT:    ret
 %retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec)
 ret {<16 x i1>, <16 x i1>} %retval

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index aa340952749c..ab2882fe95f9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -7,248 +7,228 @@
 define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) {
 ; RV32-LABEL: vector_interleave_v32i1_v16i1:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -64
-; RV32-NEXT:    .cfi_def_cfa_offset 64
-; RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 64
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -32
 ; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vfirst.m a0, v8
-; RV32-NEXT:    seqz a0, a0
-; RV32-NEXT:    sb a0, 1(sp)
 ; RV32-NEXT:    vfirst.m a0, v0
 ; RV32-NEXT:    seqz a0, a0
-; RV32-NEXT:    sb a0, 0(sp)
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v8, a0
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT:    vfirst.m a0, v8
+; RV32-NEXT:    seqz a0, a0
+; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v10, a0
 ; RV32-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.x.s a0, v8
-; RV32-NEXT:    slli a1, a0, 16
+; RV32-NEXT:    vmv.x.s a0, v0
+; RV32-NEXT:    slli a1, a0, 30
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 31(sp)
-; RV32-NEXT:    vmv.x.s a1, v0
-; RV32-NEXT:    slli a2, a1, 16
-; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 30(sp)
-; RV32-NEXT:    slli a2, a0, 17
+; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v8
+; RV32-NEXT:    slli a3, a1, 30
+; RV32-NEXT:    srli a3, a3, 31
+; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV32-NEXT:    vslide1down.vx v8, v10, a3
+; RV32-NEXT:    slli a2, a0, 29
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 29(sp)
-; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 29
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 28(sp)
-; RV32-NEXT:    slli a2, a0, 18
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 28
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 27(sp)
-; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 28
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 26(sp)
-; RV32-NEXT:    slli a2, a0, 19
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 27
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 25(sp)
-; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 27
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 24(sp)
-; RV32-NEXT:    slli a2, a0, 20
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 26
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 23(sp)
-; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 26
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 22(sp)
-; RV32-NEXT:    slli a2, a0, 21
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 25
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 21(sp)
-; RV32-NEXT:    slli a2, a1, 21
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 25
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 20(sp)
-; RV32-NEXT:    slli a2, a0, 22
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 24
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 19(sp)
-; RV32-NEXT:    slli a2, a1, 22
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 24
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 18(sp)
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32-NEXT:    slli a2, a0, 23
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 17(sp)
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32-NEXT:    slli a2, a1, 23
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 16(sp)
-; RV32-NEXT:    slli a2, a0, 24
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 22
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 15(sp)
-; RV32-NEXT:    slli a2, a1, 24
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 22
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 14(sp)
-; RV32-NEXT:    slli a2, a0, 25
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 21
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 13(sp)
-; RV32-NEXT:    slli a2, a1, 25
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 21
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 12(sp)
-; RV32-NEXT:    slli a2, a0, 26
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 20
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 11(sp)
-; RV32-NEXT:    slli a2, a1, 26
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 20
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 10(sp)
-; RV32-NEXT:    slli a2, a0, 27
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 19
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 9(sp)
-; RV32-NEXT:    slli a2, a1, 27
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 19
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 8(sp)
-; RV32-NEXT:    slli a2, a0, 28
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 18
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 7(sp)
-; RV32-NEXT:    slli a2, a1, 28
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 18
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 6(sp)
-; RV32-NEXT:    slli a2, a0, 29
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a0, 17
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 5(sp)
-; RV32-NEXT:    slli a2, a1, 29
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a2, a1, 17
 ; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    sb a2, 4(sp)
-; RV32-NEXT:    slli a0, a0, 30
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    slli a0, a0, 16
 ; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    sb a0, 3(sp)
-; RV32-NEXT:    slli a1, a1, 30
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    slli a1, a1, 16
 ; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    sb a1, 2(sp)
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV32-NEXT:    vle8.v v8, (a1)
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-NEXT:    vand.vi v8, v8, 1
 ; RV32-NEXT:    vmsne.vi v0, v8, 0
-; RV32-NEXT:    addi sp, s0, -64
-; RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 64
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vector_interleave_v32i1_v16i1:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -64
-; RV64-NEXT:    .cfi_def_cfa_offset 64
-; RV64-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 64
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -32
 ; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vfirst.m a0, v8
-; RV64-NEXT:    seqz a0, a0
-; RV64-NEXT:    sb a0, 1(sp)
 ; RV64-NEXT:    vfirst.m a0, v0
 ; RV64-NEXT:    seqz a0, a0
-; RV64-NEXT:    sb a0, 0(sp)
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV64-NEXT:    vslide1down.vx v10, v8, a0
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT:    vfirst.m a0, v8
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v8
-; RV64-NEXT:    slli a1, a0, 48
+; RV64-NEXT:    vmv.x.s a0, v0
+; RV64-NEXT:    slli a1, a0, 62
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 31(sp)
-; RV64-NEXT:    vmv.x.s a1, v0
-; RV64-NEXT:    slli a2, a1, 48
-; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 30(sp)
-; RV64-NEXT:    slli a2, a0, 49
+; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    vsetivli zero, 0, e16, mf4, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v8
+; RV64-NEXT:    slli a3, a1, 62
+; RV64-NEXT:    srli a3, a3, 63
+; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; RV64-NEXT:    vslide1down.vx v8, v10, a3
+; RV64-NEXT:    slli a2, a0, 61
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 29(sp)
-; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 61
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 28(sp)
-; RV64-NEXT:    slli a2, a0, 50
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 60
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 27(sp)
-; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 60
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 26(sp)
-; RV64-NEXT:    slli a2, a0, 51
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 59
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 25(sp)
-; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 59
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 24(sp)
-; RV64-NEXT:    slli a2, a0, 52
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 58
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 23(sp)
-; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 58
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 22(sp)
-; RV64-NEXT:    slli a2, a0, 53
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 57
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 21(sp)
-; RV64-NEXT:    slli a2, a1, 53
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 57
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 20(sp)
-; RV64-NEXT:    slli a2, a0, 54
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 56
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 19(sp)
-; RV64-NEXT:    slli a2, a1, 54
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 56
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 18(sp)
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
 ; RV64-NEXT:    slli a2, a0, 55
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 17(sp)
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
 ; RV64-NEXT:    slli a2, a1, 55
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 16(sp)
-; RV64-NEXT:    slli a2, a0, 56
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 54
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 15(sp)
-; RV64-NEXT:    slli a2, a1, 56
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 54
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 14(sp)
-; RV64-NEXT:    slli a2, a0, 57
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 53
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 13(sp)
-; RV64-NEXT:    slli a2, a1, 57
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 53
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 12(sp)
-; RV64-NEXT:    slli a2, a0, 58
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 52
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 11(sp)
-; RV64-NEXT:    slli a2, a1, 58
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 52
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 10(sp)
-; RV64-NEXT:    slli a2, a0, 59
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 51
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 9(sp)
-; RV64-NEXT:    slli a2, a1, 59
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 51
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 8(sp)
-; RV64-NEXT:    slli a2, a0, 60
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 50
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 7(sp)
-; RV64-NEXT:    slli a2, a1, 60
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 50
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 6(sp)
-; RV64-NEXT:    slli a2, a0, 61
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a0, 49
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 5(sp)
-; RV64-NEXT:    slli a2, a1, 61
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a2, a1, 49
 ; RV64-NEXT:    srli a2, a2, 63
-; RV64-NEXT:    sb a2, 4(sp)
-; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    sb a0, 3(sp)
-; RV64-NEXT:    slli a1, a1, 62
+; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    slli a1, a1, 48
 ; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    sb a1, 2(sp)
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    mv a1, sp
-; RV64-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; RV64-NEXT:    vle8.v v8, (a1)
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; RV64-NEXT:    vand.vi v8, v8, 1
 ; RV64-NEXT:    vmsne.vi v0, v8, 0
-; RV64-NEXT:    addi sp, s0, -64
-; RV64-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 64
 ; RV64-NEXT:    ret
 	   %res = call <32 x i1> @llvm.experimental.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b)
 	   ret <32 x i1> %res

diff  --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index fc7f1f588369..34210dab3854 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -597,60 +597,69 @@ define void @test_srem_vec(ptr %X) nounwind {
 ;
 ; RV32MV-LABEL: test_srem_vec:
 ; RV32MV:       # %bb.0:
-; RV32MV-NEXT:    addi sp, sp, -64
-; RV32MV-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s2, 52(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s3, 48(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s4, 44(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s5, 40(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s6, 36(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    addi s0, sp, 64
-; RV32MV-NEXT:    andi sp, sp, -32
-; RV32MV-NEXT:    mv s2, a0
-; RV32MV-NEXT:    lw a0, 8(a0)
-; RV32MV-NEXT:    lw a1, 4(s2)
-; RV32MV-NEXT:    lbu a2, 12(s2)
-; RV32MV-NEXT:    slli a3, a0, 31
-; RV32MV-NEXT:    srli a4, a1, 1
-; RV32MV-NEXT:    or s3, a4, a3
-; RV32MV-NEXT:    slli a3, a2, 30
-; RV32MV-NEXT:    srli a4, a0, 2
-; RV32MV-NEXT:    or s4, a4, a3
-; RV32MV-NEXT:    srli a0, a0, 1
+; RV32MV-NEXT:    addi sp, sp, -48
+; RV32MV-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    csrr a1, vlenb
+; RV32MV-NEXT:    slli a1, a1, 1
+; RV32MV-NEXT:    sub sp, sp, a1
+; RV32MV-NEXT:    mv s0, a0
+; RV32MV-NEXT:    lbu a0, 12(a0)
+; RV32MV-NEXT:    lw a1, 8(s0)
+; RV32MV-NEXT:    slli a2, a0, 30
+; RV32MV-NEXT:    lw a3, 4(s0)
+; RV32MV-NEXT:    srli s1, a1, 2
+; RV32MV-NEXT:    or s1, s1, a2
+; RV32MV-NEXT:    slli a2, a1, 31
+; RV32MV-NEXT:    srli a4, a3, 1
+; RV32MV-NEXT:    or s2, a4, a2
+; RV32MV-NEXT:    srli a0, a0, 2
 ; RV32MV-NEXT:    slli a0, a0, 31
-; RV32MV-NEXT:    srai s5, a0, 31
-; RV32MV-NEXT:    srli a2, a2, 2
-; RV32MV-NEXT:    slli a2, a2, 31
-; RV32MV-NEXT:    lw a0, 0(s2)
-; RV32MV-NEXT:    srai s6, a2, 31
+; RV32MV-NEXT:    srai s3, a0, 31
+; RV32MV-NEXT:    srli a1, a1, 1
 ; RV32MV-NEXT:    slli a1, a1, 31
+; RV32MV-NEXT:    lw a0, 0(s0)
+; RV32MV-NEXT:    srai s4, a1, 31
+; RV32MV-NEXT:    slli a1, a3, 31
 ; RV32MV-NEXT:    srai a1, a1, 31
 ; RV32MV-NEXT:    li a2, 6
 ; RV32MV-NEXT:    li a3, 0
 ; RV32MV-NEXT:    call __moddi3 at plt
-; RV32MV-NEXT:    sw a1, 4(sp)
-; RV32MV-NEXT:    sw a0, 0(sp)
-; RV32MV-NEXT:    li a2, -5
-; RV32MV-NEXT:    li a3, -1
-; RV32MV-NEXT:    mv a0, s4
-; RV32MV-NEXT:    mv a1, s6
-; RV32MV-NEXT:    call __moddi3 at plt
-; RV32MV-NEXT:    sw a1, 20(sp)
-; RV32MV-NEXT:    sw a0, 16(sp)
+; RV32MV-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a0
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a1
+; RV32MV-NEXT:    addi a0, sp, 16
+; RV32MV-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32MV-NEXT:    li a2, 7
-; RV32MV-NEXT:    mv a0, s3
-; RV32MV-NEXT:    mv a1, s5
+; RV32MV-NEXT:    mv a0, s2
+; RV32MV-NEXT:    mv a1, s4
 ; RV32MV-NEXT:    li a3, 0
 ; RV32MV-NEXT:    call __moddi3 at plt
-; RV32MV-NEXT:    sw a1, 12(sp)
-; RV32MV-NEXT:    sw a0, 8(sp)
+; RV32MV-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32MV-NEXT:    addi a2, sp, 16
+; RV32MV-NEXT:    vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a0
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a1
+; RV32MV-NEXT:    addi a0, sp, 16
+; RV32MV-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32MV-NEXT:    li a2, -5
+; RV32MV-NEXT:    li a3, -1
+; RV32MV-NEXT:    mv a0, s1
+; RV32MV-NEXT:    mv a1, s3
+; RV32MV-NEXT:    call __moddi3 at plt
+; RV32MV-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32MV-NEXT:    addi a2, sp, 16
+; RV32MV-NEXT:    vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a0
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a1
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a0
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32MV-NEXT:    li a0, 85
-; RV32MV-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
 ; RV32MV-NEXT:    vmv.s.x v0, a0
-; RV32MV-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT:    mv a0, sp
-; RV32MV-NEXT:    vle32.v v8, (a0)
 ; RV32MV-NEXT:    vmv.v.i v10, 1
 ; RV32MV-NEXT:    vmerge.vim v10, v10, -1, v0
 ; RV32MV-NEXT:    vand.vv v8, v8, v10
@@ -668,14 +677,14 @@ define void @test_srem_vec(ptr %X) nounwind {
 ; RV32MV-NEXT:    vmv.v.i v8, 0
 ; RV32MV-NEXT:    vmerge.vim v8, v8, -1, v0
 ; RV32MV-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
-; RV32MV-NEXT:    vse32.v v8, (s2)
+; RV32MV-NEXT:    vse32.v v8, (s0)
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 1
 ; RV32MV-NEXT:    vmv.x.s a0, v10
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 2
 ; RV32MV-NEXT:    vmv.x.s a1, v10
 ; RV32MV-NEXT:    slli a2, a1, 1
 ; RV32MV-NEXT:    sub a2, a2, a0
-; RV32MV-NEXT:    sw a2, 4(s2)
+; RV32MV-NEXT:    sw a2, 4(s0)
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 4
 ; RV32MV-NEXT:    vmv.x.s a0, v10
 ; RV32MV-NEXT:    srli a2, a0, 30
@@ -684,7 +693,7 @@ define void @test_srem_vec(ptr %X) nounwind {
 ; RV32MV-NEXT:    slli a3, a3, 2
 ; RV32MV-NEXT:    or a2, a3, a2
 ; RV32MV-NEXT:    andi a2, a2, 7
-; RV32MV-NEXT:    sb a2, 12(s2)
+; RV32MV-NEXT:    sb a2, 12(s0)
 ; RV32MV-NEXT:    srli a1, a1, 31
 ; RV32MV-NEXT:    vslidedown.vi v8, v8, 3
 ; RV32MV-NEXT:    vmv.x.s a2, v8
@@ -693,70 +702,65 @@ define void @test_srem_vec(ptr %X) nounwind {
 ; RV32MV-NEXT:    slli a0, a0, 2
 ; RV32MV-NEXT:    or a0, a1, a0
 ; RV32MV-NEXT:    or a0, a0, a2
-; RV32MV-NEXT:    sw a0, 8(s2)
-; RV32MV-NEXT:    addi sp, s0, -64
-; RV32MV-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s2, 52(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s3, 48(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s4, 44(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s5, 40(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s6, 36(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    addi sp, sp, 64
+; RV32MV-NEXT:    sw a0, 8(s0)
+; RV32MV-NEXT:    csrr a0, vlenb
+; RV32MV-NEXT:    slli a0, a0, 1
+; RV32MV-NEXT:    add sp, sp, a0
+; RV32MV-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    addi sp, sp, 48
 ; RV32MV-NEXT:    ret
 ;
 ; RV64MV-LABEL: test_srem_vec:
 ; RV64MV:       # %bb.0:
-; RV64MV-NEXT:    addi sp, sp, -64
-; RV64MV-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64MV-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64MV-NEXT:    addi s0, sp, 64
-; RV64MV-NEXT:    andi sp, sp, -32
-; RV64MV-NEXT:    lbu a1, 12(a0)
+; RV64MV-NEXT:    ld a1, 0(a0)
 ; RV64MV-NEXT:    lwu a2, 8(a0)
-; RV64MV-NEXT:    slli a1, a1, 32
-; RV64MV-NEXT:    ld a3, 0(a0)
-; RV64MV-NEXT:    or a1, a2, a1
-; RV64MV-NEXT:    slli a1, a1, 29
-; RV64MV-NEXT:    srai a1, a1, 31
-; RV64MV-NEXT:    srli a4, a3, 2
-; RV64MV-NEXT:    slli a2, a2, 62
-; RV64MV-NEXT:    lui a5, %hi(.LCPI3_0)
-; RV64MV-NEXT:    ld a5, %lo(.LCPI3_0)(a5)
-; RV64MV-NEXT:    or a2, a2, a4
-; RV64MV-NEXT:    slli a3, a3, 31
+; RV64MV-NEXT:    srli a3, a1, 2
+; RV64MV-NEXT:    lbu a4, 12(a0)
+; RV64MV-NEXT:    slli a5, a2, 62
+; RV64MV-NEXT:    or a3, a5, a3
 ; RV64MV-NEXT:    srai a3, a3, 31
-; RV64MV-NEXT:    mulh a4, a3, a5
+; RV64MV-NEXT:    slli a4, a4, 32
+; RV64MV-NEXT:    or a2, a2, a4
+; RV64MV-NEXT:    slli a2, a2, 29
+; RV64MV-NEXT:    lui a4, %hi(.LCPI3_0)
+; RV64MV-NEXT:    ld a4, %lo(.LCPI3_0)(a4)
+; RV64MV-NEXT:    srai a2, a2, 31
+; RV64MV-NEXT:    slli a1, a1, 31
+; RV64MV-NEXT:    srai a1, a1, 31
+; RV64MV-NEXT:    mulh a4, a2, a4
 ; RV64MV-NEXT:    srli a5, a4, 63
+; RV64MV-NEXT:    srai a4, a4, 1
 ; RV64MV-NEXT:    add a4, a4, a5
-; RV64MV-NEXT:    li a5, 6
-; RV64MV-NEXT:    mul a4, a4, a5
 ; RV64MV-NEXT:    lui a5, %hi(.LCPI3_1)
 ; RV64MV-NEXT:    ld a5, %lo(.LCPI3_1)(a5)
-; RV64MV-NEXT:    srai a2, a2, 31
-; RV64MV-NEXT:    sub a3, a3, a4
-; RV64MV-NEXT:    sd a3, 0(sp)
-; RV64MV-NEXT:    mulh a3, a2, a5
-; RV64MV-NEXT:    srli a4, a3, 63
-; RV64MV-NEXT:    srai a3, a3, 1
-; RV64MV-NEXT:    add a3, a3, a4
-; RV64MV-NEXT:    slli a4, a3, 3
+; RV64MV-NEXT:    add a2, a2, a4
+; RV64MV-NEXT:    slli a4, a4, 2
+; RV64MV-NEXT:    add a2, a2, a4
+; RV64MV-NEXT:    mulh a4, a3, a5
+; RV64MV-NEXT:    srli a5, a4, 63
+; RV64MV-NEXT:    srai a4, a4, 1
+; RV64MV-NEXT:    add a4, a4, a5
 ; RV64MV-NEXT:    lui a5, %hi(.LCPI3_2)
 ; RV64MV-NEXT:    ld a5, %lo(.LCPI3_2)(a5)
-; RV64MV-NEXT:    add a2, a2, a3
-; RV64MV-NEXT:    sub a2, a2, a4
-; RV64MV-NEXT:    sd a2, 8(sp)
-; RV64MV-NEXT:    mulh a2, a1, a5
-; RV64MV-NEXT:    srli a3, a2, 63
-; RV64MV-NEXT:    srai a2, a2, 1
-; RV64MV-NEXT:    add a2, a2, a3
-; RV64MV-NEXT:    slli a3, a2, 2
-; RV64MV-NEXT:    add a1, a1, a2
-; RV64MV-NEXT:    add a1, a1, a3
-; RV64MV-NEXT:    sd a1, 16(sp)
-; RV64MV-NEXT:    mv a1, sp
+; RV64MV-NEXT:    add a3, a3, a4
+; RV64MV-NEXT:    slli a4, a4, 3
+; RV64MV-NEXT:    sub a3, a3, a4
+; RV64MV-NEXT:    mulh a4, a1, a5
+; RV64MV-NEXT:    srli a5, a4, 63
+; RV64MV-NEXT:    add a4, a4, a5
+; RV64MV-NEXT:    li a5, 6
+; RV64MV-NEXT:    mul a4, a4, a5
+; RV64MV-NEXT:    sub a1, a1, a4
 ; RV64MV-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64MV-NEXT:    vle64.v v8, (a1)
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a1
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a3
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a2
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64MV-NEXT:    lui a1, %hi(.LCPI3_3)
 ; RV64MV-NEXT:    addi a1, a1, %lo(.LCPI3_3)
 ; RV64MV-NEXT:    vle64.v v10, (a1)
@@ -784,10 +788,6 @@ define void @test_srem_vec(ptr %X) nounwind {
 ; RV64MV-NEXT:    srli a3, a3, 62
 ; RV64MV-NEXT:    or a2, a3, a2
 ; RV64MV-NEXT:    sw a2, 8(a0)
-; RV64MV-NEXT:    addi sp, s0, -64
-; RV64MV-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64MV-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64MV-NEXT:    addi sp, sp, 64
 ; RV64MV-NEXT:    ret
   %ld = load <3 x i33>, ptr %X
   %srem = srem <3 x i33> %ld, <i33 6, i33 7, i33 -5>

diff  --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 4c0a776b5ee8..adce58ccc801 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -521,43 +521,41 @@ define void @test_urem_vec(ptr %X) nounwind {
 ;
 ; RV32MV-LABEL: test_urem_vec:
 ; RV32MV:       # %bb.0:
-; RV32MV-NEXT:    addi sp, sp, -16
 ; RV32MV-NEXT:    lw a1, 0(a0)
 ; RV32MV-NEXT:    andi a2, a1, 2047
-; RV32MV-NEXT:    sh a2, 8(sp)
-; RV32MV-NEXT:    slli a2, a1, 10
-; RV32MV-NEXT:    srli a2, a2, 21
-; RV32MV-NEXT:    sh a2, 10(sp)
+; RV32MV-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32MV-NEXT:    lbu a2, 4(a0)
+; RV32MV-NEXT:    slli a3, a1, 10
+; RV32MV-NEXT:    srli a3, a3, 21
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a3
 ; RV32MV-NEXT:    slli a2, a2, 10
 ; RV32MV-NEXT:    srli a1, a1, 22
 ; RV32MV-NEXT:    or a1, a1, a2
 ; RV32MV-NEXT:    andi a1, a1, 2047
-; RV32MV-NEXT:    sh a1, 12(sp)
-; RV32MV-NEXT:    addi a1, sp, 8
-; RV32MV-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32MV-NEXT:    vle16.v v8, (a1)
-; RV32MV-NEXT:    vmv.v.i v9, 10
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a1
+; RV32MV-NEXT:    lui a1, %hi(.LCPI4_0)
+; RV32MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
+; RV32MV-NEXT:    vle16.v v9, (a1)
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a0
+; RV32MV-NEXT:    vid.v v10
+; RV32MV-NEXT:    vsub.vv v8, v8, v10
+; RV32MV-NEXT:    vmul.vv v8, v8, v9
+; RV32MV-NEXT:    vadd.vv v9, v8, v8
+; RV32MV-NEXT:    vmv.v.i v10, 10
 ; RV32MV-NEXT:    li a1, 9
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
-; RV32MV-NEXT:    vmv.s.x v9, a1
+; RV32MV-NEXT:    vmv.s.x v10, a1
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32MV-NEXT:    lui a1, %hi(.LCPI4_0)
-; RV32MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
-; RV32MV-NEXT:    vle16.v v10, (a1)
-; RV32MV-NEXT:    vid.v v11
-; RV32MV-NEXT:    vsub.vv v8, v8, v11
-; RV32MV-NEXT:    vmul.vv v8, v8, v10
-; RV32MV-NEXT:    vadd.vv v10, v8, v8
-; RV32MV-NEXT:    vsll.vv v9, v10, v9
+; RV32MV-NEXT:    vsll.vv v9, v9, v10
+; RV32MV-NEXT:    li a1, 2047
+; RV32MV-NEXT:    vand.vx v8, v8, a1
 ; RV32MV-NEXT:    vmv.v.i v10, 0
-; RV32MV-NEXT:    li a1, 1
+; RV32MV-NEXT:    li a2, 1
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
 ; RV32MV-NEXT:    vmv1r.v v11, v10
-; RV32MV-NEXT:    vmv.s.x v11, a1
-; RV32MV-NEXT:    li a1, 2047
+; RV32MV-NEXT:    vmv.s.x v11, a2
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32MV-NEXT:    vand.vx v8, v8, a1
 ; RV32MV-NEXT:    lui a2, %hi(.LCPI4_1)
 ; RV32MV-NEXT:    addi a2, a2, %lo(.LCPI4_1)
 ; RV32MV-NEXT:    vle16.v v12, (a2)
@@ -581,47 +579,44 @@ define void @test_urem_vec(ptr %X) nounwind {
 ; RV32MV-NEXT:    or a1, a2, a1
 ; RV32MV-NEXT:    or a1, a1, a3
 ; RV32MV-NEXT:    sw a1, 0(a0)
-; RV32MV-NEXT:    addi sp, sp, 16
 ; RV32MV-NEXT:    ret
 ;
 ; RV64MV-LABEL: test_urem_vec:
 ; RV64MV:       # %bb.0:
-; RV64MV-NEXT:    addi sp, sp, -16
 ; RV64MV-NEXT:    lbu a1, 4(a0)
 ; RV64MV-NEXT:    lwu a2, 0(a0)
 ; RV64MV-NEXT:    slli a1, a1, 32
 ; RV64MV-NEXT:    or a1, a2, a1
-; RV64MV-NEXT:    srli a2, a1, 22
-; RV64MV-NEXT:    sh a2, 12(sp)
 ; RV64MV-NEXT:    andi a2, a1, 2047
-; RV64MV-NEXT:    sh a2, 8(sp)
-; RV64MV-NEXT:    slli a1, a1, 42
-; RV64MV-NEXT:    srli a1, a1, 53
-; RV64MV-NEXT:    sh a1, 10(sp)
-; RV64MV-NEXT:    addi a1, sp, 8
 ; RV64MV-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64MV-NEXT:    vle16.v v8, (a1)
-; RV64MV-NEXT:    vmv.v.i v9, 10
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a2
+; RV64MV-NEXT:    slli a2, a1, 42
+; RV64MV-NEXT:    srli a2, a2, 53
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a2
+; RV64MV-NEXT:    srli a1, a1, 22
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a1
+; RV64MV-NEXT:    lui a1, %hi(.LCPI4_0)
+; RV64MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
+; RV64MV-NEXT:    vle16.v v9, (a1)
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a0
+; RV64MV-NEXT:    vid.v v10
+; RV64MV-NEXT:    vsub.vv v8, v8, v10
+; RV64MV-NEXT:    vmul.vv v8, v8, v9
+; RV64MV-NEXT:    vadd.vv v9, v8, v8
+; RV64MV-NEXT:    vmv.v.i v10, 10
 ; RV64MV-NEXT:    li a1, 9
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
-; RV64MV-NEXT:    vmv.s.x v9, a1
+; RV64MV-NEXT:    vmv.s.x v10, a1
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64MV-NEXT:    lui a1, %hi(.LCPI4_0)
-; RV64MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
-; RV64MV-NEXT:    vle16.v v10, (a1)
-; RV64MV-NEXT:    vid.v v11
-; RV64MV-NEXT:    vsub.vv v8, v8, v11
-; RV64MV-NEXT:    vmul.vv v8, v8, v10
-; RV64MV-NEXT:    vadd.vv v10, v8, v8
-; RV64MV-NEXT:    vsll.vv v9, v10, v9
+; RV64MV-NEXT:    vsll.vv v9, v9, v10
+; RV64MV-NEXT:    li a1, 2047
+; RV64MV-NEXT:    vand.vx v8, v8, a1
 ; RV64MV-NEXT:    vmv.v.i v10, 0
-; RV64MV-NEXT:    li a1, 1
+; RV64MV-NEXT:    li a2, 1
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
 ; RV64MV-NEXT:    vmv1r.v v11, v10
-; RV64MV-NEXT:    vmv.s.x v11, a1
-; RV64MV-NEXT:    li a1, 2047
+; RV64MV-NEXT:    vmv.s.x v11, a2
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64MV-NEXT:    vand.vx v8, v8, a1
 ; RV64MV-NEXT:    lui a2, %hi(.LCPI4_1)
 ; RV64MV-NEXT:    addi a2, a2, %lo(.LCPI4_1)
 ; RV64MV-NEXT:    vle16.v v12, (a2)
@@ -645,7 +640,6 @@ define void @test_urem_vec(ptr %X) nounwind {
 ; RV64MV-NEXT:    slli a1, a1, 31
 ; RV64MV-NEXT:    srli a1, a1, 63
 ; RV64MV-NEXT:    sb a1, 4(a0)
-; RV64MV-NEXT:    addi sp, sp, 16
 ; RV64MV-NEXT:    ret
   %ld = load <3 x i11>, ptr %X
   %urem = urem <3 x i11> %ld, <i11 6, i11 7, i11 -5>


        


More information about the llvm-commits mailing list