[llvm] 35d218e - [RISCV] Use vslide1down idiom for generic build_vector
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon May 1 19:08:14 PDT 2023
Author: Philip Reames
Date: 2023-05-01T19:04:35-07:00
New Revision: 35d218e92740fb49ad5e2be4c700aa38c1133809
URL: https://github.com/llvm/llvm-project/commit/35d218e92740fb49ad5e2be4c700aa38c1133809
DIFF: https://github.com/llvm/llvm-project/commit/35d218e92740fb49ad5e2be4c700aa38c1133809.diff
LOG: [RISCV] Use vslide1down idiom for generic build_vector
We had previously been going through the stack.
A couple small notes:
We have the vslide1down idiom in a few other places. As a post patch, I plan to try to common the code a bit.
VF=2 case is still going through the splat + insert path. Picking the optimal sequence for this seems to be a bit fiddly (due to constant mat costs), so I restricted this to cases which would have previously hit the stack.
I'm only handling integer vectors for the moment. Mostly because I don't see the existing vfslide1down ISD nodes being in place. This will be an obvious followup.
One of the test diffs does expose a missing combine - a build_vector with a prefix coming from a vector extract sequence. The code after this is arguably worse (due to domain crossing vs stack store), but I think this is a narrow enough case to be non-blocking for now. Let me know if you disagree.
Differential Revision: https://reviews.llvm.org/D149263
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ff081a7eead9..d8ebe5ea9355 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3186,7 +3186,24 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return Vec;
}
- return SDValue();
+ // For constant vectors, use generic constant pool lowering. Otherwise,
+ // we'd have to materialize constants in GPRs just to move them into the
+ // vector.
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()))
+ return SDValue();
+
+ // We can use a series of vslide1down instructions to move values in GPRs
+ // into the appropriate place in the result vector. We use slide1down
+ // to avoid the register group overlap constraint of vslide1up.
+ if (VT.isFloatingPoint())
+ // TODO: Use vfslide1down.
+ return SDValue();
+
+ SDValue Vec = DAG.getUNDEF(ContainerVT);
+ for (const SDValue &V : Op->ops())
+ Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL);
+ return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
index 6ea49d5a1314..212b2b840ce6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll
@@ -34,29 +34,23 @@ define <2 x i1> @reverse_v2i1(<2 x i1> %a) {
define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
; CHECK-LABEL: reverse_v4i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-NEXT: addi a0, sp, 14
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: vslidedown.vi v9, v8, 2
-; CHECK-NEXT: addi a0, sp, 13
-; CHECK-NEXT: vse8.v v9, (a0)
-; CHECK-NEXT: vslidedown.vi v8, v8, 3
-; CHECK-NEXT: addi a0, sp, 12
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vfirst.m a1, v0
-; CHECK-NEXT: seqz a1, a1
-; CHECK-NEXT: sb a1, 15(sp)
-; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vslidedown.vi v9, v8, 3
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: vslide1down.vx v9, v8, a0
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
+; CHECK-NEXT: vmv.x.s a0, v10
+; CHECK-NEXT: vslide1down.vx v9, v9, a0
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vslide1down.vx v8, v9, a0
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%res = call <4 x i1> @llvm.experimental.vector.reverse.v4i1(<4 x i1> %a)
ret <4 x i1> %res
@@ -65,224 +59,200 @@ define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
; RV32-BITS-UNKNOWN-LABEL: reverse_v8i1:
; RV32-BITS-UNKNOWN: # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16
-; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16
-; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
-; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0
-; RV32-BITS-UNKNOWN-NEXT: sb a0, 15(sp)
+; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29
+; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp)
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 24
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 30
; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a0, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 8
-; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a0)
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 16
; RV32-BITS-UNKNOWN-NEXT: ret
;
; RV32-BITS-256-LABEL: reverse_v8i1:
; RV32-BITS-256: # %bb.0:
-; RV32-BITS-256-NEXT: addi sp, sp, -16
-; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16
-; RV32-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-BITS-256-NEXT: vfirst.m a0, v0
-; RV32-BITS-256-NEXT: seqz a0, a0
-; RV32-BITS-256-NEXT: sb a0, 15(sp)
+; RV32-BITS-256-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
; RV32-BITS-256-NEXT: vmv.x.s a0, v0
-; RV32-BITS-256-NEXT: slli a1, a0, 30
+; RV32-BITS-256-NEXT: slli a1, a0, 24
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 14(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 29
+; RV32-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 25
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 13(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 28
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 26
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 12(sp)
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
; RV32-BITS-256-NEXT: slli a1, a0, 27
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 11(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 26
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 28
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 10(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 25
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 29
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 9(sp)
-; RV32-BITS-256-NEXT: slli a0, a0, 24
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a0, a0, 30
; RV32-BITS-256-NEXT: srli a0, a0, 31
-; RV32-BITS-256-NEXT: sb a0, 8(sp)
-; RV32-BITS-256-NEXT: addi a0, sp, 8
-; RV32-BITS-256-NEXT: vle8.v v8, (a0)
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: vfirst.m a0, v0
+; RV32-BITS-256-NEXT: seqz a0, a0
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-256-NEXT: vand.vi v8, v8, 1
; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-256-NEXT: addi sp, sp, 16
; RV32-BITS-256-NEXT: ret
;
; RV32-BITS-512-LABEL: reverse_v8i1:
; RV32-BITS-512: # %bb.0:
-; RV32-BITS-512-NEXT: addi sp, sp, -16
-; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16
-; RV32-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-BITS-512-NEXT: vfirst.m a0, v0
-; RV32-BITS-512-NEXT: seqz a0, a0
-; RV32-BITS-512-NEXT: sb a0, 15(sp)
+; RV32-BITS-512-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
; RV32-BITS-512-NEXT: vmv.x.s a0, v0
-; RV32-BITS-512-NEXT: slli a1, a0, 30
+; RV32-BITS-512-NEXT: slli a1, a0, 24
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 14(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 29
+; RV32-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 25
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 13(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 28
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 26
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 12(sp)
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
; RV32-BITS-512-NEXT: slli a1, a0, 27
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 11(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 26
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 28
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 10(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 25
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 29
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 9(sp)
-; RV32-BITS-512-NEXT: slli a0, a0, 24
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a0, a0, 30
; RV32-BITS-512-NEXT: srli a0, a0, 31
-; RV32-BITS-512-NEXT: sb a0, 8(sp)
-; RV32-BITS-512-NEXT: addi a0, sp, 8
-; RV32-BITS-512-NEXT: vle8.v v8, (a0)
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: vfirst.m a0, v0
+; RV32-BITS-512-NEXT: seqz a0, a0
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-512-NEXT: vand.vi v8, v8, 1
; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-512-NEXT: addi sp, sp, 16
; RV32-BITS-512-NEXT: ret
;
; RV64-BITS-UNKNOWN-LABEL: reverse_v8i1:
; RV64-BITS-UNKNOWN: # %bb.0:
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16
-; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16
-; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
-; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0
-; RV64-BITS-UNKNOWN-NEXT: sb a0, 15(sp)
+; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61
+; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp)
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 56
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 62
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a0, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 8
-; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a0)
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 16
; RV64-BITS-UNKNOWN-NEXT: ret
;
; RV64-BITS-256-LABEL: reverse_v8i1:
; RV64-BITS-256: # %bb.0:
-; RV64-BITS-256-NEXT: addi sp, sp, -16
-; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16
-; RV64-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-BITS-256-NEXT: vfirst.m a0, v0
-; RV64-BITS-256-NEXT: seqz a0, a0
-; RV64-BITS-256-NEXT: sb a0, 15(sp)
+; RV64-BITS-256-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
; RV64-BITS-256-NEXT: vmv.x.s a0, v0
-; RV64-BITS-256-NEXT: slli a1, a0, 62
+; RV64-BITS-256-NEXT: slli a1, a0, 56
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 14(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 61
+; RV64-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 57
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 13(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 60
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 58
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 12(sp)
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
; RV64-BITS-256-NEXT: slli a1, a0, 59
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 11(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 58
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 60
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 10(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 57
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 61
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 9(sp)
-; RV64-BITS-256-NEXT: slli a0, a0, 56
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a0, a0, 62
; RV64-BITS-256-NEXT: srli a0, a0, 63
-; RV64-BITS-256-NEXT: sb a0, 8(sp)
-; RV64-BITS-256-NEXT: addi a0, sp, 8
-; RV64-BITS-256-NEXT: vle8.v v8, (a0)
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-256-NEXT: vfirst.m a0, v0
+; RV64-BITS-256-NEXT: seqz a0, a0
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-256-NEXT: vand.vi v8, v8, 1
; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-256-NEXT: addi sp, sp, 16
; RV64-BITS-256-NEXT: ret
;
; RV64-BITS-512-LABEL: reverse_v8i1:
; RV64-BITS-512: # %bb.0:
-; RV64-BITS-512-NEXT: addi sp, sp, -16
-; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16
-; RV64-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-BITS-512-NEXT: vfirst.m a0, v0
-; RV64-BITS-512-NEXT: seqz a0, a0
-; RV64-BITS-512-NEXT: sb a0, 15(sp)
+; RV64-BITS-512-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
; RV64-BITS-512-NEXT: vmv.x.s a0, v0
-; RV64-BITS-512-NEXT: slli a1, a0, 62
+; RV64-BITS-512-NEXT: slli a1, a0, 56
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 14(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 61
+; RV64-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 57
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 13(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 60
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 58
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 12(sp)
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
; RV64-BITS-512-NEXT: slli a1, a0, 59
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 11(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 58
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 60
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 10(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 57
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 61
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 9(sp)
-; RV64-BITS-512-NEXT: slli a0, a0, 56
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a0, a0, 62
; RV64-BITS-512-NEXT: srli a0, a0, 63
-; RV64-BITS-512-NEXT: sb a0, 8(sp)
-; RV64-BITS-512-NEXT: addi a0, sp, 8
-; RV64-BITS-512-NEXT: vle8.v v8, (a0)
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-512-NEXT: vfirst.m a0, v0
+; RV64-BITS-512-NEXT: seqz a0, a0
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-512-NEXT: vand.vi v8, v8, 1
; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-512-NEXT: addi sp, sp, 16
; RV64-BITS-512-NEXT: ret
%res = call <8 x i1> @llvm.experimental.vector.reverse.v8i1(<8 x i1> %a)
ret <8 x i1> %res
@@ -291,380 +261,344 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
; RV32-BITS-UNKNOWN-LABEL: reverse_v16i1:
; RV32-BITS-UNKNOWN: # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16
-; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16
-; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
-; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0
-; RV32-BITS-UNKNOWN-NEXT: sb a0, 15(sp)
; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29
+; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 8(sp)
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 16
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 30
; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a0, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT: mv a0, sp
-; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a0)
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 16
; RV32-BITS-UNKNOWN-NEXT: ret
;
; RV32-BITS-256-LABEL: reverse_v16i1:
; RV32-BITS-256: # %bb.0:
-; RV32-BITS-256-NEXT: addi sp, sp, -16
-; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16
-; RV32-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-256-NEXT: vfirst.m a0, v0
-; RV32-BITS-256-NEXT: seqz a0, a0
-; RV32-BITS-256-NEXT: sb a0, 15(sp)
; RV32-BITS-256-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV32-BITS-256-NEXT: vmv.x.s a0, v0
-; RV32-BITS-256-NEXT: slli a1, a0, 30
+; RV32-BITS-256-NEXT: slli a1, a0, 16
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 14(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 29
+; RV32-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 17
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 13(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 28
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 18
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 12(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 27
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 19
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 11(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 26
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 20
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 10(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 25
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 21
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 9(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 24
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 22
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 8(sp)
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
; RV32-BITS-256-NEXT: slli a1, a0, 23
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 7(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 22
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 24
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 6(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 21
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 25
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 5(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 20
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 26
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 4(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 19
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 27
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 3(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 18
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 28
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 2(sp)
-; RV32-BITS-256-NEXT: slli a1, a0, 17
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 29
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 1(sp)
-; RV32-BITS-256-NEXT: slli a0, a0, 16
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a0, a0, 30
; RV32-BITS-256-NEXT: srli a0, a0, 31
-; RV32-BITS-256-NEXT: sb a0, 0(sp)
-; RV32-BITS-256-NEXT: mv a0, sp
-; RV32-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-256-NEXT: vle8.v v8, (a0)
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: vfirst.m a0, v0
+; RV32-BITS-256-NEXT: seqz a0, a0
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-256-NEXT: vand.vi v8, v8, 1
; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-256-NEXT: addi sp, sp, 16
; RV32-BITS-256-NEXT: ret
;
; RV32-BITS-512-LABEL: reverse_v16i1:
; RV32-BITS-512: # %bb.0:
-; RV32-BITS-512-NEXT: addi sp, sp, -16
-; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16
-; RV32-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-512-NEXT: vfirst.m a0, v0
-; RV32-BITS-512-NEXT: seqz a0, a0
-; RV32-BITS-512-NEXT: sb a0, 15(sp)
; RV32-BITS-512-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV32-BITS-512-NEXT: vmv.x.s a0, v0
-; RV32-BITS-512-NEXT: slli a1, a0, 30
+; RV32-BITS-512-NEXT: slli a1, a0, 16
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 14(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 29
+; RV32-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 17
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 13(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 28
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 18
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 12(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 27
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 19
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 11(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 26
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 20
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 10(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 25
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 21
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 9(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 24
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 22
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 8(sp)
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
; RV32-BITS-512-NEXT: slli a1, a0, 23
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 7(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 22
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 24
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 6(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 21
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 25
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 5(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 20
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 26
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 4(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 19
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 27
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 3(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 18
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 28
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 2(sp)
-; RV32-BITS-512-NEXT: slli a1, a0, 17
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 29
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 1(sp)
-; RV32-BITS-512-NEXT: slli a0, a0, 16
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a0, a0, 30
; RV32-BITS-512-NEXT: srli a0, a0, 31
-; RV32-BITS-512-NEXT: sb a0, 0(sp)
-; RV32-BITS-512-NEXT: mv a0, sp
-; RV32-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BITS-512-NEXT: vle8.v v8, (a0)
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: vfirst.m a0, v0
+; RV32-BITS-512-NEXT: seqz a0, a0
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-512-NEXT: vand.vi v8, v8, 1
; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-512-NEXT: addi sp, sp, 16
; RV32-BITS-512-NEXT: ret
;
; RV64-BITS-UNKNOWN-LABEL: reverse_v16i1:
; RV64-BITS-UNKNOWN: # %bb.0:
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16
-; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16
-; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
-; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0
-; RV64-BITS-UNKNOWN-NEXT: sb a0, 15(sp)
; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61
+; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 8(sp)
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61
; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 48
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 62
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a0, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT: mv a0, sp
-; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a0)
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 16
; RV64-BITS-UNKNOWN-NEXT: ret
;
; RV64-BITS-256-LABEL: reverse_v16i1:
; RV64-BITS-256: # %bb.0:
-; RV64-BITS-256-NEXT: addi sp, sp, -16
-; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16
-; RV64-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-256-NEXT: vfirst.m a0, v0
-; RV64-BITS-256-NEXT: seqz a0, a0
-; RV64-BITS-256-NEXT: sb a0, 15(sp)
; RV64-BITS-256-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV64-BITS-256-NEXT: vmv.x.s a0, v0
-; RV64-BITS-256-NEXT: slli a1, a0, 62
+; RV64-BITS-256-NEXT: slli a1, a0, 48
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 14(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 61
+; RV64-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 49
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 13(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 60
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 50
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 12(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 59
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 51
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 11(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 58
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 52
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 10(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 57
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 53
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 9(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 56
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 54
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 8(sp)
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
; RV64-BITS-256-NEXT: slli a1, a0, 55
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 7(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 54
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 56
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 6(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 53
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 57
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 5(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 52
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 58
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 4(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 51
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 59
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 3(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 50
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 60
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 2(sp)
-; RV64-BITS-256-NEXT: slli a1, a0, 49
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 61
; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 1(sp)
-; RV64-BITS-256-NEXT: slli a0, a0, 48
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a0, a0, 62
; RV64-BITS-256-NEXT: srli a0, a0, 63
-; RV64-BITS-256-NEXT: sb a0, 0(sp)
-; RV64-BITS-256-NEXT: mv a0, sp
-; RV64-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-256-NEXT: vle8.v v8, (a0)
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-256-NEXT: vfirst.m a0, v0
+; RV64-BITS-256-NEXT: seqz a0, a0
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-256-NEXT: vand.vi v8, v8, 1
; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-256-NEXT: addi sp, sp, 16
; RV64-BITS-256-NEXT: ret
;
; RV64-BITS-512-LABEL: reverse_v16i1:
; RV64-BITS-512: # %bb.0:
-; RV64-BITS-512-NEXT: addi sp, sp, -16
-; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16
-; RV64-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-512-NEXT: vfirst.m a0, v0
-; RV64-BITS-512-NEXT: seqz a0, a0
-; RV64-BITS-512-NEXT: sb a0, 15(sp)
; RV64-BITS-512-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV64-BITS-512-NEXT: vmv.x.s a0, v0
-; RV64-BITS-512-NEXT: slli a1, a0, 62
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 14(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 61
+; RV64-BITS-512-NEXT: slli a1, a0, 48
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 13(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 60
+; RV64-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 49
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 12(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 59
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 50
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 11(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 58
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 51
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 10(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 57
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 52
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 9(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 56
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 53
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 54
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 8(sp)
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
; RV64-BITS-512-NEXT: slli a1, a0, 55
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 7(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 54
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 56
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 6(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 53
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 57
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 5(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 52
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 58
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 4(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 51
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 59
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 3(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 50
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 60
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 2(sp)
-; RV64-BITS-512-NEXT: slli a1, a0, 49
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 61
; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 1(sp)
-; RV64-BITS-512-NEXT: slli a0, a0, 48
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a0, a0, 62
; RV64-BITS-512-NEXT: srli a0, a0, 63
-; RV64-BITS-512-NEXT: sb a0, 0(sp)
-; RV64-BITS-512-NEXT: mv a0, sp
-; RV64-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BITS-512-NEXT: vle8.v v8, (a0)
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-512-NEXT: vfirst.m a0, v0
+; RV64-BITS-512-NEXT: seqz a0, a0
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-512-NEXT: vand.vi v8, v8, 1
; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-512-NEXT: addi sp, sp, 16
; RV64-BITS-512-NEXT: ret
%res = call <16 x i1> @llvm.experimental.vector.reverse.v16i1(<16 x i1> %a)
ret <16 x i1> %res
@@ -673,728 +607,632 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
; RV32-BITS-UNKNOWN-LABEL: reverse_v32i1:
; RV32-BITS-UNKNOWN: # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -64
-; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 64
-; RV32-BITS-UNKNOWN-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT: .cfi_offset ra, -4
-; RV32-BITS-UNKNOWN-NEXT: .cfi_offset s0, -8
-; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 64
-; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
-; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -32
-; RV32-BITS-UNKNOWN-NEXT: li a0, 32
-; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vfirst.m a1, v0
-; RV32-BITS-UNKNOWN-NEXT: seqz a1, a1
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 31(sp)
; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 30(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 29(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 28(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 27(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 26(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 25(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 24(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 23(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 22(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 21(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 20(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 19(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 18(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 17(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 16
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 16(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 15(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 13
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 1
+; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: li a2, 32
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 1
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 6
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 7
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 8
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 9
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 10
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 11
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 12
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 13
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 14
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 15
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28
; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT: mv a1, sp
-; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a1)
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 30
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -64
-; RV32-BITS-UNKNOWN-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 64
; RV32-BITS-UNKNOWN-NEXT: ret
;
; RV32-BITS-256-LABEL: reverse_v32i1:
; RV32-BITS-256: # %bb.0:
-; RV32-BITS-256-NEXT: addi sp, sp, -64
-; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 64
-; RV32-BITS-256-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT: .cfi_offset ra, -4
-; RV32-BITS-256-NEXT: .cfi_offset s0, -8
-; RV32-BITS-256-NEXT: addi s0, sp, 64
-; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0
-; RV32-BITS-256-NEXT: andi sp, sp, -32
-; RV32-BITS-256-NEXT: li a0, 32
-; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-256-NEXT: vfirst.m a1, v0
-; RV32-BITS-256-NEXT: seqz a1, a1
-; RV32-BITS-256-NEXT: sb a1, 31(sp)
; RV32-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-256-NEXT: vmv.x.s a1, v0
-; RV32-BITS-256-NEXT: srli a2, a1, 31
-; RV32-BITS-256-NEXT: sb a2, 0(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 30
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 30(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 29
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 29(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 28
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 28(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 27
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 27(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 26
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 26(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 25
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 25(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 24
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 24(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 23
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 23(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 22
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 22(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 21
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 21(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 20
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 20(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 19
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 19(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 18
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 18(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 17
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 17(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 16
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 16(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 15
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 15(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 14
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 14(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 13
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 13(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 12
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 12(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 11
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 11(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 10
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 10(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 9
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 9(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 8
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 8(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 7
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 7(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 6
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 6(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 5
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 5(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 4
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 4(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 3
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 3(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 2
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 2(sp)
-; RV32-BITS-256-NEXT: slli a1, a1, 1
+; RV32-BITS-256-NEXT: vmv.x.s a0, v0
+; RV32-BITS-256-NEXT: srli a1, a0, 31
+; RV32-BITS-256-NEXT: li a2, 32
+; RV32-BITS-256-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 1
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 2
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 3
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 4
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 5
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 6
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 7
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 8
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 9
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 10
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 11
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 12
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 13
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 14
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 15
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 16
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 17
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 18
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 19
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 20
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 21
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 22
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 23
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 24
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 25
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 26
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 27
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 28
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a1, a0, 29
; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 1(sp)
-; RV32-BITS-256-NEXT: mv a1, sp
-; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-256-NEXT: vle8.v v8, (a1)
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: slli a0, a0, 30
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: vfirst.m a0, v0
+; RV32-BITS-256-NEXT: seqz a0, a0
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-256-NEXT: vand.vi v8, v8, 1
; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-256-NEXT: addi sp, s0, -64
-; RV32-BITS-256-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT: addi sp, sp, 64
; RV32-BITS-256-NEXT: ret
;
; RV32-BITS-512-LABEL: reverse_v32i1:
; RV32-BITS-512: # %bb.0:
-; RV32-BITS-512-NEXT: addi sp, sp, -64
-; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 64
-; RV32-BITS-512-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT: .cfi_offset ra, -4
-; RV32-BITS-512-NEXT: .cfi_offset s0, -8
-; RV32-BITS-512-NEXT: addi s0, sp, 64
-; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0
-; RV32-BITS-512-NEXT: andi sp, sp, -32
-; RV32-BITS-512-NEXT: li a0, 32
-; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-512-NEXT: vfirst.m a1, v0
-; RV32-BITS-512-NEXT: seqz a1, a1
-; RV32-BITS-512-NEXT: sb a1, 31(sp)
; RV32-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV32-BITS-512-NEXT: vmv.x.s a1, v0
-; RV32-BITS-512-NEXT: srli a2, a1, 31
-; RV32-BITS-512-NEXT: sb a2, 0(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 30
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 30(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 29
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 29(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 28
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 28(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 27
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 27(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 26
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 26(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 25
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 25(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 24
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 24(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 23
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 23(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 22
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 22(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 21
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 21(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 20
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 20(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 19
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 19(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 18
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 18(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 17
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 17(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 16
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 16(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 15
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 15(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 14
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 14(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 13
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 13(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 12
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 12(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 11
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 11(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 10
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 10(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 9
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 9(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 8
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 8(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 7
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 7(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 6
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 6(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 5
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 5(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 4
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 4(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 3
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 3(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 2
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 2(sp)
-; RV32-BITS-512-NEXT: slli a1, a1, 1
+; RV32-BITS-512-NEXT: vmv.x.s a0, v0
+; RV32-BITS-512-NEXT: srli a1, a0, 31
+; RV32-BITS-512-NEXT: li a2, 32
+; RV32-BITS-512-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 1
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 2
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 3
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 4
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 5
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 6
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 7
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 8
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 9
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 10
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 11
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 12
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 13
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 14
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 15
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 16
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 17
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 18
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 19
; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 1(sp)
-; RV32-BITS-512-NEXT: mv a1, sp
-; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV32-BITS-512-NEXT: vle8.v v8, (a1)
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 20
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 21
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 22
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 23
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 24
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 25
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 26
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 27
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 28
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a1, a0, 29
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: slli a0, a0, 30
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: vfirst.m a0, v0
+; RV32-BITS-512-NEXT: seqz a0, a0
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-512-NEXT: vand.vi v8, v8, 1
; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-512-NEXT: addi sp, s0, -64
-; RV32-BITS-512-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT: addi sp, sp, 64
; RV32-BITS-512-NEXT: ret
;
; RV64-BITS-UNKNOWN-LABEL: reverse_v32i1:
; RV64-BITS-UNKNOWN: # %bb.0:
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -64
-; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 64
-; RV64-BITS-UNKNOWN-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT: .cfi_offset ra, -8
-; RV64-BITS-UNKNOWN-NEXT: .cfi_offset s0, -16
-; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 64
-; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
-; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -32
-; RV64-BITS-UNKNOWN-NEXT: li a0, 32
-; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vfirst.m a1, v0
-; RV64-BITS-UNKNOWN-NEXT: seqz a1, a1
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 31(sp)
; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0
-; RV64-BITS-UNKNOWN-NEXT: srliw a2, a1, 31
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 62
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 30(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 61
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 29(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 60
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 28(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 59
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 27(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 58
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 26(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 57
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 25(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 56
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 24(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 55
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 23(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 54
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 22(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 53
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 21(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 52
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 20(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 51
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 19(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 50
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 18(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 49
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 17(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 48
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 16(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 47
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 15(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 46
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 45
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 44
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 43
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 42
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 41
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 40
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 39
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 38
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 37
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 36
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 35
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 34
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a1, 33
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT: mv a1, sp
-; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a1)
+; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
+; RV64-BITS-UNKNOWN-NEXT: srliw a1, a0, 31
+; RV64-BITS-UNKNOWN-NEXT: li a2, 32
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 33
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 34
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 35
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 36
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 37
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 38
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 39
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 40
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 41
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 42
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 43
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 44
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 45
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 46
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 47
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 62
+; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -64
-; RV64-BITS-UNKNOWN-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 64
; RV64-BITS-UNKNOWN-NEXT: ret
;
; RV64-BITS-256-LABEL: reverse_v32i1:
; RV64-BITS-256: # %bb.0:
-; RV64-BITS-256-NEXT: addi sp, sp, -64
-; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 64
-; RV64-BITS-256-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT: .cfi_offset ra, -8
-; RV64-BITS-256-NEXT: .cfi_offset s0, -16
-; RV64-BITS-256-NEXT: addi s0, sp, 64
-; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0
-; RV64-BITS-256-NEXT: andi sp, sp, -32
-; RV64-BITS-256-NEXT: li a0, 32
-; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-256-NEXT: vfirst.m a1, v0
-; RV64-BITS-256-NEXT: seqz a1, a1
-; RV64-BITS-256-NEXT: sb a1, 31(sp)
; RV64-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-256-NEXT: vmv.x.s a1, v0
-; RV64-BITS-256-NEXT: srliw a2, a1, 31
-; RV64-BITS-256-NEXT: sb a2, 0(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 62
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 30(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 61
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 29(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 60
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 28(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 59
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 27(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 58
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 26(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 57
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 25(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 56
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 24(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 55
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 23(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 54
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 22(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 53
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 21(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 52
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 20(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 51
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 19(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 50
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 18(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 49
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 17(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 48
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 16(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 47
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 15(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 46
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 14(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 45
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 13(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 44
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 12(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 43
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 11(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 42
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 10(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 41
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 9(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 40
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 8(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 39
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 7(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 38
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 6(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 37
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 5(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 36
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 4(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 35
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 3(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 34
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 2(sp)
-; RV64-BITS-256-NEXT: slli a1, a1, 33
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 1(sp)
-; RV64-BITS-256-NEXT: mv a1, sp
-; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-256-NEXT: vle8.v v8, (a1)
+; RV64-BITS-256-NEXT: vmv.x.s a0, v0
+; RV64-BITS-256-NEXT: srliw a1, a0, 31
+; RV64-BITS-256-NEXT: li a2, 32
+; RV64-BITS-256-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 33
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 34
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 35
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 36
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 37
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 38
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 39
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 40
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 41
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 42
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 43
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 44
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 45
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 46
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 47
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 48
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 49
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 50
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 51
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 52
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 53
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 54
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 55
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 56
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 57
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 58
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 59
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 60
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 61
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a0, a0, 62
+; RV64-BITS-256-NEXT: srli a0, a0, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-256-NEXT: vfirst.m a0, v0
+; RV64-BITS-256-NEXT: seqz a0, a0
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-256-NEXT: vand.vi v8, v8, 1
; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-256-NEXT: addi sp, s0, -64
-; RV64-BITS-256-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT: addi sp, sp, 64
; RV64-BITS-256-NEXT: ret
;
; RV64-BITS-512-LABEL: reverse_v32i1:
; RV64-BITS-512: # %bb.0:
-; RV64-BITS-512-NEXT: addi sp, sp, -64
-; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 64
-; RV64-BITS-512-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT: .cfi_offset ra, -8
-; RV64-BITS-512-NEXT: .cfi_offset s0, -16
-; RV64-BITS-512-NEXT: addi s0, sp, 64
-; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0
-; RV64-BITS-512-NEXT: andi sp, sp, -32
-; RV64-BITS-512-NEXT: li a0, 32
-; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-512-NEXT: vfirst.m a1, v0
-; RV64-BITS-512-NEXT: seqz a1, a1
-; RV64-BITS-512-NEXT: sb a1, 31(sp)
; RV64-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; RV64-BITS-512-NEXT: vmv.x.s a1, v0
-; RV64-BITS-512-NEXT: srliw a2, a1, 31
-; RV64-BITS-512-NEXT: sb a2, 0(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 62
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 30(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 61
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 29(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 60
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 28(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 59
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 27(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 58
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 26(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 57
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 25(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 56
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 24(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 55
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 23(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 54
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 22(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 53
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 21(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 52
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 20(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 51
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 19(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 50
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 18(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 49
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 17(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 48
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 16(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 47
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 15(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 46
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 14(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 45
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 13(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 44
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 12(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 43
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 11(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 42
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 10(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 41
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 9(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 40
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 8(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 39
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 7(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 38
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 6(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 37
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 5(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 36
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 4(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 35
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 3(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 34
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 2(sp)
-; RV64-BITS-512-NEXT: slli a1, a1, 33
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 1(sp)
-; RV64-BITS-512-NEXT: mv a1, sp
-; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV64-BITS-512-NEXT: vle8.v v8, (a1)
+; RV64-BITS-512-NEXT: vmv.x.s a0, v0
+; RV64-BITS-512-NEXT: srliw a1, a0, 31
+; RV64-BITS-512-NEXT: li a2, 32
+; RV64-BITS-512-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 33
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 34
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 35
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 36
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 37
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 38
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 39
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 40
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 41
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 42
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 43
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 44
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 45
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 46
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 47
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 48
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 49
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 50
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 51
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 52
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 53
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 54
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 55
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 56
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 57
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 58
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 59
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 60
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 61
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a0, a0, 62
+; RV64-BITS-512-NEXT: srli a0, a0, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-512-NEXT: vfirst.m a0, v0
+; RV64-BITS-512-NEXT: seqz a0, a0
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-512-NEXT: vand.vi v8, v8, 1
; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-512-NEXT: addi sp, s0, -64
-; RV64-BITS-512-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT: addi sp, sp, 64
; RV64-BITS-512-NEXT: ret
%res = call <32 x i1> @llvm.experimental.vector.reverse.v32i1(<32 x i1> %a)
ret <32 x i1> %res
@@ -1403,1301 +1241,1211 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
; RV32-BITS-UNKNOWN-LABEL: reverse_v64i1:
; RV32-BITS-UNKNOWN: # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -128
-; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 128
-; RV32-BITS-UNKNOWN-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT: .cfi_offset ra, -4
-; RV32-BITS-UNKNOWN-NEXT: .cfi_offset s0, -8
-; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 128
-; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
-; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -64
-; RV32-BITS-UNKNOWN-NEXT: li a0, 64
-; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vfirst.m a1, v0
-; RV32-BITS-UNKNOWN-NEXT: seqz a1, a1
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 63(sp)
; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0
+; RV32-BITS-UNKNOWN-NEXT: vslidedown.vi v8, v0, 1
+; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v8
; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 32(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30
+; RV32-BITS-UNKNOWN-NEXT: li a0, 64
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 1
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 62(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 61(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 60(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 59(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 58(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 57(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 56(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 55(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 54(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 53(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 52(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 51(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 50(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 49(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 16
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 48(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 47(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 46(sp)
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 13
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 45(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 44(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 43(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 42(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 41(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 40(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 39(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 38(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 37(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 36(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 35(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 34(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 1
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 33(sp)
-; RV32-BITS-UNKNOWN-NEXT: vslidedown.vi v8, v0, 1
-; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v8
-; RV32-BITS-UNKNOWN-NEXT: andi a2, a1, 1
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 31(sp)
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 0(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 30(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 29(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 28(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 27(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 26(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 25(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 24(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 23(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 22(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 21(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 20(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19
-; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 19(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 18(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 17(sp)
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 16
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 16(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 15(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 14(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 13
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 13(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 12(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 11(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 10(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 9(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 8(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 7(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 6(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 5(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 4(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 3(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30
; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a2, 2(sp)
-; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 1
-; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
-; RV32-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
-; RV32-BITS-UNKNOWN-NEXT: mv a1, sp
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: andi a1, a1, 1
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0
+; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31
; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a1)
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 1
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 2
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 3
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 4
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 5
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 6
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 7
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 8
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 9
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 10
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 11
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 12
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 13
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 14
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 15
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 16
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 17
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 18
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 19
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 20
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 21
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 22
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 23
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 24
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 25
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 26
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 27
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 28
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 29
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 30
+; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -128
-; RV32-BITS-UNKNOWN-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 128
; RV32-BITS-UNKNOWN-NEXT: ret
;
; RV32-BITS-256-LABEL: reverse_v64i1:
; RV32-BITS-256: # %bb.0:
-; RV32-BITS-256-NEXT: addi sp, sp, -128
-; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 128
-; RV32-BITS-256-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT: .cfi_offset ra, -4
-; RV32-BITS-256-NEXT: .cfi_offset s0, -8
-; RV32-BITS-256-NEXT: addi s0, sp, 128
-; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0
-; RV32-BITS-256-NEXT: andi sp, sp, -64
-; RV32-BITS-256-NEXT: li a0, 64
-; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-256-NEXT: vfirst.m a1, v0
-; RV32-BITS-256-NEXT: seqz a1, a1
-; RV32-BITS-256-NEXT: sb a1, 63(sp)
; RV32-BITS-256-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-256-NEXT: vmv.x.s a1, v0
-; RV32-BITS-256-NEXT: srli a2, a1, 31
-; RV32-BITS-256-NEXT: sb a2, 32(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 30
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 62(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 29
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 61(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 28
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 60(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 27
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 59(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 26
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 58(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 25
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 57(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 24
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 56(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 23
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 55(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 22
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 54(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 21
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 53(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 20
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 52(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 19
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 51(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 18
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 50(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 17
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 49(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 16
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 48(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 15
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 47(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 14
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 46(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 13
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 45(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 12
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 44(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 11
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 43(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 10
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 42(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 9
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 41(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 8
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 40(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 7
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 39(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 6
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 38(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 5
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 37(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 4
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 36(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 3
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 35(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 2
-; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 34(sp)
-; RV32-BITS-256-NEXT: slli a1, a1, 1
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 33(sp)
; RV32-BITS-256-NEXT: vslidedown.vi v8, v0, 1
; RV32-BITS-256-NEXT: vmv.x.s a1, v8
-; RV32-BITS-256-NEXT: andi a2, a1, 1
-; RV32-BITS-256-NEXT: sb a2, 31(sp)
; RV32-BITS-256-NEXT: srli a2, a1, 31
-; RV32-BITS-256-NEXT: sb a2, 0(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 30
+; RV32-BITS-256-NEXT: li a0, 64
+; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 1
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 30(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 29
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 2
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 29(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 28
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 3
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 28(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 27
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 4
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 27(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 26
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 5
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 26(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 25
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 6
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 25(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 24
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 7
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 24(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 23
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 8
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 23(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 22
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 9
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 22(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 21
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 10
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 21(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 20
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 11
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 20(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 19
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 12
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 19(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 18
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 13
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 18(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 17
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 14
+; RV32-BITS-256-NEXT: srli a2, a2, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 15
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 17(sp)
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
; RV32-BITS-256-NEXT: slli a2, a1, 16
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 16(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 15
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 17
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 15(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 14
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 18
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 14(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 13
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 19
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 13(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 12
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 20
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 12(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 11
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 21
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 11(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 10
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 22
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 10(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 9
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 23
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 9(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 8
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 24
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 8(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 7
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 25
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 7(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 6
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 26
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 6(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 5
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 27
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 5(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 4
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 28
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 4(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 3
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 29
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 3(sp)
-; RV32-BITS-256-NEXT: slli a2, a1, 2
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a2, a1, 30
; RV32-BITS-256-NEXT: srli a2, a2, 31
-; RV32-BITS-256-NEXT: sb a2, 2(sp)
-; RV32-BITS-256-NEXT: slli a1, a1, 1
-; RV32-BITS-256-NEXT: srli a1, a1, 31
-; RV32-BITS-256-NEXT: sb a1, 1(sp)
-; RV32-BITS-256-NEXT: mv a1, sp
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: andi a1, a1, 1
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-256-NEXT: vmv.x.s a1, v0
+; RV32-BITS-256-NEXT: srli a2, a1, 31
; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-256-NEXT: vle8.v v8, (a1)
-; RV32-BITS-256-NEXT: vand.vi v8, v8, 1
-; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-256-NEXT: addi sp, s0, -128
-; RV32-BITS-256-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT: addi sp, sp, 128
-; RV32-BITS-256-NEXT: ret
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-256-NEXT: slli a0, a1, 1
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 2
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 3
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 4
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 5
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 6
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 7
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 8
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 9
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 10
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 11
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 12
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 13
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 14
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 15
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 16
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 17
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 18
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 19
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 20
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 21
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 22
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 23
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 24
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 25
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 26
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 27
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 28
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a0, a1, 29
+; RV32-BITS-256-NEXT: srli a0, a0, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: slli a1, a1, 30
+; RV32-BITS-256-NEXT: srli a1, a1, 31
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-256-NEXT: vfirst.m a0, v0
+; RV32-BITS-256-NEXT: seqz a0, a0
+; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-256-NEXT: vand.vi v8, v8, 1
+; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: ret
;
; RV32-BITS-512-LABEL: reverse_v64i1:
; RV32-BITS-512: # %bb.0:
-; RV32-BITS-512-NEXT: addi sp, sp, -128
-; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 128
-; RV32-BITS-512-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT: .cfi_offset ra, -4
-; RV32-BITS-512-NEXT: .cfi_offset s0, -8
-; RV32-BITS-512-NEXT: addi s0, sp, 128
-; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0
-; RV32-BITS-512-NEXT: andi sp, sp, -64
-; RV32-BITS-512-NEXT: li a0, 64
-; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-512-NEXT: vfirst.m a1, v0
-; RV32-BITS-512-NEXT: seqz a1, a1
-; RV32-BITS-512-NEXT: sb a1, 63(sp)
; RV32-BITS-512-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-BITS-512-NEXT: vmv.x.s a1, v0
-; RV32-BITS-512-NEXT: srli a2, a1, 31
-; RV32-BITS-512-NEXT: sb a2, 32(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 30
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 62(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 29
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 61(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 28
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 60(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 27
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 59(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 26
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 58(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 25
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 57(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 24
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 56(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 23
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 55(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 22
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 54(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 21
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 53(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 20
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 52(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 19
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 51(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 18
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 50(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 17
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 49(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 16
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 48(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 15
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 47(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 14
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 46(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 13
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 45(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 12
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 44(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 11
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 43(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 10
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 42(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 9
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 41(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 8
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 40(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 7
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 39(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 6
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 38(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 5
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 37(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 4
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 36(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 3
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 35(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 2
-; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 34(sp)
-; RV32-BITS-512-NEXT: slli a1, a1, 1
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 33(sp)
; RV32-BITS-512-NEXT: vslidedown.vi v8, v0, 1
; RV32-BITS-512-NEXT: vmv.x.s a1, v8
-; RV32-BITS-512-NEXT: andi a2, a1, 1
-; RV32-BITS-512-NEXT: sb a2, 31(sp)
; RV32-BITS-512-NEXT: srli a2, a1, 31
-; RV32-BITS-512-NEXT: sb a2, 0(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 30
+; RV32-BITS-512-NEXT: li a0, 64
+; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 1
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 30(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 29
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 2
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 29(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 28
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 3
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 28(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 27
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 4
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 27(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 26
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 5
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 26(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 25
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 6
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 25(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 24
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 7
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 24(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 23
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 8
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 23(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 22
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 9
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 22(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 21
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 10
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 21(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 20
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 11
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 20(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 19
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 12
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 19(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 18
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 13
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 18(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 17
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 14
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 17(sp)
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 15
+; RV32-BITS-512-NEXT: srli a2, a2, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
; RV32-BITS-512-NEXT: slli a2, a1, 16
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 16(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 15
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 17
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 15(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 14
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 18
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 14(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 13
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 19
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 13(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 12
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 20
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 12(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 11
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 21
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 11(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 10
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 22
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 10(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 9
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 23
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 9(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 8
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 24
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 8(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 7
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 25
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 7(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 6
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 26
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 6(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 5
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 27
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 5(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 4
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 28
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 4(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 3
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 29
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 3(sp)
-; RV32-BITS-512-NEXT: slli a2, a1, 2
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a2, a1, 30
; RV32-BITS-512-NEXT: srli a2, a2, 31
-; RV32-BITS-512-NEXT: sb a2, 2(sp)
-; RV32-BITS-512-NEXT: slli a1, a1, 1
-; RV32-BITS-512-NEXT: srli a1, a1, 31
-; RV32-BITS-512-NEXT: sb a1, 1(sp)
-; RV32-BITS-512-NEXT: mv a1, sp
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: andi a1, a1, 1
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-512-NEXT: vmv.x.s a1, v0
+; RV32-BITS-512-NEXT: srli a2, a1, 31
; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV32-BITS-512-NEXT: vle8.v v8, (a1)
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2
+; RV32-BITS-512-NEXT: slli a0, a1, 1
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 2
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 3
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 4
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 5
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 6
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 7
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 8
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 9
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 10
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 11
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 12
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 13
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 14
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 15
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 16
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 17
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 18
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 19
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 20
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 21
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 22
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 23
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 24
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 25
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 26
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 27
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 28
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a0, a1, 29
+; RV32-BITS-512-NEXT: srli a0, a0, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV32-BITS-512-NEXT: slli a1, a1, 30
+; RV32-BITS-512-NEXT: srli a1, a1, 31
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV32-BITS-512-NEXT: vfirst.m a0, v0
+; RV32-BITS-512-NEXT: seqz a0, a0
+; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0
; RV32-BITS-512-NEXT: vand.vi v8, v8, 1
; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
-; RV32-BITS-512-NEXT: addi sp, s0, -128
-; RV32-BITS-512-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT: addi sp, sp, 128
; RV32-BITS-512-NEXT: ret
;
; RV64-BITS-UNKNOWN-LABEL: reverse_v64i1:
; RV64-BITS-UNKNOWN: # %bb.0:
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -128
-; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 128
-; RV64-BITS-UNKNOWN-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT: .cfi_offset ra, -8
-; RV64-BITS-UNKNOWN-NEXT: .cfi_offset s0, -16
-; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 128
-; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
-; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -64
-; RV64-BITS-UNKNOWN-NEXT: li a0, 64
-; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vfirst.m a1, v0
-; RV64-BITS-UNKNOWN-NEXT: seqz a1, a1
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 63(sp)
; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0
-; RV64-BITS-UNKNOWN-NEXT: srliw a2, a1, 31
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 32(sp)
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 0(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 62
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 62(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 61
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 61(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 60
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 60(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 59
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 59(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 58
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 58(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 57
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 57(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 56
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 56(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 55
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 55(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 54
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 54(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 53
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 53(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 52
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 52(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 51
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 51(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 50
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 50(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 49
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 49(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 48
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 48(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 47
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 47(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 46
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 46(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 45
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 45(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 44
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 44(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 43
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 43(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 42
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 42(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 41
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 41(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 40
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 40(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 39
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 39(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 38
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 38(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 37
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 37(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 36
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 36(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 35
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 35(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 34
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 34(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 33
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 33(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 31
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 31(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 30
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 30(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 29
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 29(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 28
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 28(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 27
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 27(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 26
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 26(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 25
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 25(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 24
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 24(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 23
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 23(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 22
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 22(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 21
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 21(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 20
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 20(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 19
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 19(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 18
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 18(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 17
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 17(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 16
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 16(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 15
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 15(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 14
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 14(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 13
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 13(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 12
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 12(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 11
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 11(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 10
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 10(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 9
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 9(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 8
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 8(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 7
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 7(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 6
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 6(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 5
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 5(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 4
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 4(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 3
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 3(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 2
-; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a2, 2(sp)
-; RV64-BITS-UNKNOWN-NEXT: slli a1, a1, 1
-; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
-; RV64-BITS-UNKNOWN-NEXT: sb a1, 1(sp)
-; RV64-BITS-UNKNOWN-NEXT: mv a1, sp
-; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a1)
+; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a0, 63
+; RV64-BITS-UNKNOWN-NEXT: li a2, 64
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 1
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 4
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 5
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 6
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 7
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 8
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 9
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 10
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 11
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 12
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 13
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 14
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 15
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 16
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 17
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 18
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 19
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 20
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 21
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 22
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 23
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 24
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 25
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 26
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 27
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 28
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 29
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 30
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 31
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: srliw a1, a0, 31
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 33
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 34
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 35
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 36
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 37
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 38
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 39
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 40
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 41
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 42
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 43
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 44
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 45
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 46
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 47
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61
+; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 62
+; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0
+; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0
+; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -128
-; RV64-BITS-UNKNOWN-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 128
; RV64-BITS-UNKNOWN-NEXT: ret
;
; RV64-BITS-256-LABEL: reverse_v64i1:
; RV64-BITS-256: # %bb.0:
-; RV64-BITS-256-NEXT: addi sp, sp, -128
-; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 128
-; RV64-BITS-256-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT: .cfi_offset ra, -8
-; RV64-BITS-256-NEXT: .cfi_offset s0, -16
-; RV64-BITS-256-NEXT: addi s0, sp, 128
-; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0
-; RV64-BITS-256-NEXT: andi sp, sp, -64
-; RV64-BITS-256-NEXT: li a0, 64
-; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-256-NEXT: vfirst.m a1, v0
-; RV64-BITS-256-NEXT: seqz a1, a1
-; RV64-BITS-256-NEXT: sb a1, 63(sp)
; RV64-BITS-256-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-256-NEXT: vmv.x.s a1, v0
-; RV64-BITS-256-NEXT: srliw a2, a1, 31
-; RV64-BITS-256-NEXT: sb a2, 32(sp)
-; RV64-BITS-256-NEXT: srli a2, a1, 63
-; RV64-BITS-256-NEXT: sb a2, 0(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 62
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 62(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 61
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 61(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 60
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 60(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 59
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 59(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 58
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 58(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 57
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 57(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 56
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 56(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 55
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 55(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 54
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 54(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 53
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 53(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 52
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 52(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 51
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 51(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 50
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 50(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 49
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 49(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 48
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 48(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 47
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 47(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 46
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 46(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 45
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 45(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 44
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 44(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 43
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 43(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 42
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 42(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 41
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 41(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 40
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 40(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 39
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 39(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 38
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 38(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 37
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 37(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 36
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 36(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 35
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 35(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 34
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 34(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 33
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 33(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 31
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 31(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 30
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 30(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 29
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 29(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 28
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 28(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 27
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 27(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 26
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 26(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 25
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 25(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 24
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 24(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 23
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 23(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 22
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 22(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 21
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 21(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 20
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 20(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 19
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 19(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 18
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 18(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 17
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 17(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 16
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 16(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 15
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 15(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 14
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 14(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 13
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 13(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 12
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 12(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 11
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 11(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 10
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 10(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 9
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 9(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 8
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 8(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 7
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 7(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 6
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 6(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 5
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 5(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 4
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 4(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 3
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 3(sp)
-; RV64-BITS-256-NEXT: slli a2, a1, 2
-; RV64-BITS-256-NEXT: srli a2, a2, 63
-; RV64-BITS-256-NEXT: sb a2, 2(sp)
-; RV64-BITS-256-NEXT: slli a1, a1, 1
-; RV64-BITS-256-NEXT: srli a1, a1, 63
-; RV64-BITS-256-NEXT: sb a1, 1(sp)
-; RV64-BITS-256-NEXT: mv a1, sp
-; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-256-NEXT: vle8.v v8, (a1)
+; RV64-BITS-256-NEXT: vmv.x.s a0, v0
+; RV64-BITS-256-NEXT: srli a1, a0, 63
+; RV64-BITS-256-NEXT: li a2, 64
+; RV64-BITS-256-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 1
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 2
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 3
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 4
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 5
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 6
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 7
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 8
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 9
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 10
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 11
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 12
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 13
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 14
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 15
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 16
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 17
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 18
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 19
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 20
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 21
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 22
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 23
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 24
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 25
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 26
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 27
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 28
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 29
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 30
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 31
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: srliw a1, a0, 31
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 33
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 34
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 35
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 36
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 37
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 38
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 39
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 40
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 41
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 42
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 43
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 44
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 45
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 46
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 47
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 48
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 49
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 50
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 51
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 52
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 53
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 54
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 55
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 56
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 57
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 58
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 59
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 60
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a1, a0, 61
+; RV64-BITS-256-NEXT: srli a1, a1, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-256-NEXT: slli a0, a0, 62
+; RV64-BITS-256-NEXT: srli a0, a0, 63
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-256-NEXT: vfirst.m a0, v0
+; RV64-BITS-256-NEXT: seqz a0, a0
+; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-256-NEXT: vand.vi v8, v8, 1
; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-256-NEXT: addi sp, s0, -128
-; RV64-BITS-256-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT: addi sp, sp, 128
; RV64-BITS-256-NEXT: ret
;
; RV64-BITS-512-LABEL: reverse_v64i1:
; RV64-BITS-512: # %bb.0:
-; RV64-BITS-512-NEXT: addi sp, sp, -128
-; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 128
-; RV64-BITS-512-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT: .cfi_offset ra, -8
-; RV64-BITS-512-NEXT: .cfi_offset s0, -16
-; RV64-BITS-512-NEXT: addi s0, sp, 128
-; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0
-; RV64-BITS-512-NEXT: andi sp, sp, -64
-; RV64-BITS-512-NEXT: li a0, 64
-; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-512-NEXT: vfirst.m a1, v0
-; RV64-BITS-512-NEXT: seqz a1, a1
-; RV64-BITS-512-NEXT: sb a1, 63(sp)
; RV64-BITS-512-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; RV64-BITS-512-NEXT: vmv.x.s a1, v0
-; RV64-BITS-512-NEXT: srliw a2, a1, 31
-; RV64-BITS-512-NEXT: sb a2, 32(sp)
-; RV64-BITS-512-NEXT: srli a2, a1, 63
-; RV64-BITS-512-NEXT: sb a2, 0(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 62
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 62(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 61
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 61(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 60
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 60(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 59
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 59(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 58
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 58(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 57
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 57(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 56
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 56(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 55
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 55(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 54
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 54(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 53
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 53(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 52
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 52(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 51
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 51(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 50
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 50(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 49
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 49(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 48
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 48(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 47
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 47(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 46
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 46(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 45
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 45(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 44
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 44(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 43
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 43(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 42
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 42(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 41
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 41(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 40
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 40(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 39
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 39(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 38
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 38(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 37
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 37(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 36
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 36(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 35
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 35(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 34
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 34(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 33
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 33(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 31
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 31(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 30
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 30(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 29
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 29(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 28
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 28(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 27
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 27(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 26
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 26(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 25
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 25(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 24
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 24(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 23
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 23(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 22
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 22(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 21
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 21(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 20
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 20(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 19
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 19(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 18
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 18(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 17
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 17(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 16
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 16(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 15
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 15(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 14
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 14(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 13
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 13(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 12
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 12(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 11
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 11(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 10
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 10(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 9
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 9(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 8
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 8(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 7
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 7(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 6
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 6(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 5
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 5(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 4
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 4(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 3
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 3(sp)
-; RV64-BITS-512-NEXT: slli a2, a1, 2
-; RV64-BITS-512-NEXT: srli a2, a2, 63
-; RV64-BITS-512-NEXT: sb a2, 2(sp)
-; RV64-BITS-512-NEXT: slli a1, a1, 1
-; RV64-BITS-512-NEXT: srli a1, a1, 63
-; RV64-BITS-512-NEXT: sb a1, 1(sp)
-; RV64-BITS-512-NEXT: mv a1, sp
-; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64-BITS-512-NEXT: vle8.v v8, (a1)
+; RV64-BITS-512-NEXT: vmv.x.s a0, v0
+; RV64-BITS-512-NEXT: srli a1, a0, 63
+; RV64-BITS-512-NEXT: li a2, 64
+; RV64-BITS-512-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 1
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 2
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 3
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 4
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 5
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 6
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 7
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 8
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 9
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 10
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 11
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 12
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 13
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 14
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 15
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 16
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 17
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 18
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 19
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 20
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 21
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 22
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 23
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 24
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 25
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 26
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 27
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 28
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 29
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 30
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 31
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: srliw a1, a0, 31
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 33
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 34
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 35
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 36
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 37
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 38
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 39
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 40
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 41
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 42
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 43
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 44
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 45
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 46
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 47
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 48
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 49
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 50
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 51
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 52
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 53
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 54
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 55
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 56
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 57
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 58
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 59
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 60
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a1, a0, 61
+; RV64-BITS-512-NEXT: srli a1, a1, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1
+; RV64-BITS-512-NEXT: slli a0, a0, 62
+; RV64-BITS-512-NEXT: srli a0, a0, 63
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0
+; RV64-BITS-512-NEXT: vfirst.m a0, v0
+; RV64-BITS-512-NEXT: seqz a0, a0
+; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0
; RV64-BITS-512-NEXT: vand.vi v8, v8, 1
; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
-; RV64-BITS-512-NEXT: addi sp, s0, -128
-; RV64-BITS-512-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT: addi sp, sp, 128
; RV64-BITS-512-NEXT: ret
%res = call <64 x i1> @llvm.experimental.vector.reverse.v64i1(<64 x i1> %a)
ret <64 x i1> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index ad5f11eb6e78..fdca1aaf6bc8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -310,11 +310,11 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
;
; RV32-LABEL: fp2si_v8f64_v8i8:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: vfmv.f.s fa3, v8
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 1
+; RV32-NEXT: vfmv.f.s fa3, v12
; RV32-NEXT: lui a0, %hi(.LCPI12_0)
; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
; RV32-NEXT: lui a0, %hi(.LCPI12_1)
@@ -325,63 +325,73 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: sb a0, 8(sp)
-; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 7
-; RV32-NEXT: vfmv.f.s fa3, v12
-; RV32-NEXT: feq.d a0, fa3, fa3
-; RV32-NEXT: neg a0, a0
+; RV32-NEXT: vfmv.f.s fa3, v8
+; RV32-NEXT: feq.d a2, fa3, fa3
+; RV32-NEXT: neg a2, a2
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
-; RV32-NEXT: fcvt.w.d a2, fa3, rtz
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: sb a0, 15(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 6
-; RV32-NEXT: vfmv.f.s fa3, v12
+; RV32-NEXT: fcvt.w.d a3, fa3, rtz
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v8, a2
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 2
+; RV32-NEXT: vfmv.f.s fa3, v16
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: neg a0, a0
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: sb a0, 14(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 5
-; RV32-NEXT: vfmv.f.s fa3, v12
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 3
+; RV32-NEXT: vfmv.f.s fa3, v16
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: neg a0, a0
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: sb a0, 13(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 4
-; RV32-NEXT: vfmv.f.s fa3, v12
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 4
+; RV32-NEXT: vfmv.f.s fa3, v16
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: neg a0, a0
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: sb a0, 12(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 3
-; RV32-NEXT: vfmv.f.s fa3, v12
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 5
+; RV32-NEXT: vfmv.f.s fa3, v16
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: neg a0, a0
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: sb a0, 11(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 2
-; RV32-NEXT: vfmv.f.s fa3, v12
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 6
+; RV32-NEXT: vfmv.f.s fa3, v16
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: neg a0, a0
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: sb a0, 10(sp)
-; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 7
; RV32-NEXT: vfmv.f.s fa3, v8
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: neg a0, a0
@@ -389,101 +399,105 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa5, fa5, fa4
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: sb a0, 9(sp)
-; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
+; RV32-NEXT: vslide1down.vx v8, v12, a0
; RV32-NEXT: vse8.v v8, (a1)
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: fp2si_v8f64_v8i8:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vfmv.f.s fa3, v8
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v12, v8, 1
+; RV64-NEXT: vfmv.f.s fa3, v12
; RV64-NEXT: lui a0, %hi(.LCPI12_0)
; RV64-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
; RV64-NEXT: lui a0, %hi(.LCPI12_1)
; RV64-NEXT: fld fa4, %lo(.LCPI12_1)(a0)
; RV64-NEXT: feq.d a0, fa3, fa3
-; RV64-NEXT: negw a0, a0
+; RV64-NEXT: neg a0, a0
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: sb a0, 8(sp)
-; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 7
-; RV64-NEXT: vfmv.f.s fa3, v12
-; RV64-NEXT: feq.d a0, fa3, fa3
-; RV64-NEXT: negw a0, a0
+; RV64-NEXT: vfmv.f.s fa3, v8
+; RV64-NEXT: feq.d a2, fa3, fa3
+; RV64-NEXT: neg a2, a2
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
-; RV64-NEXT: fcvt.l.d a2, fa3, rtz
-; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: sb a0, 15(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 6
-; RV64-NEXT: vfmv.f.s fa3, v12
+; RV64-NEXT: fcvt.l.d a3, fa3, rtz
+; RV64-NEXT: and a2, a2, a3
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v8, a2
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 2
+; RV64-NEXT: vfmv.f.s fa3, v16
; RV64-NEXT: feq.d a0, fa3, fa3
-; RV64-NEXT: negw a0, a0
+; RV64-NEXT: neg a0, a0
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: sb a0, 14(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 5
-; RV64-NEXT: vfmv.f.s fa3, v12
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 3
+; RV64-NEXT: vfmv.f.s fa3, v16
; RV64-NEXT: feq.d a0, fa3, fa3
-; RV64-NEXT: negw a0, a0
+; RV64-NEXT: neg a0, a0
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: sb a0, 13(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 4
-; RV64-NEXT: vfmv.f.s fa3, v12
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 4
+; RV64-NEXT: vfmv.f.s fa3, v16
; RV64-NEXT: feq.d a0, fa3, fa3
-; RV64-NEXT: negw a0, a0
+; RV64-NEXT: neg a0, a0
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: sb a0, 12(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 3
-; RV64-NEXT: vfmv.f.s fa3, v12
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 5
+; RV64-NEXT: vfmv.f.s fa3, v16
; RV64-NEXT: feq.d a0, fa3, fa3
-; RV64-NEXT: negw a0, a0
+; RV64-NEXT: neg a0, a0
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: sb a0, 11(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 2
-; RV64-NEXT: vfmv.f.s fa3, v12
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 6
+; RV64-NEXT: vfmv.f.s fa3, v16
; RV64-NEXT: feq.d a0, fa3, fa3
-; RV64-NEXT: negw a0, a0
+; RV64-NEXT: neg a0, a0
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: sb a0, 10(sp)
-; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 7
; RV64-NEXT: vfmv.f.s fa3, v8
; RV64-NEXT: feq.d a0, fa3, fa3
-; RV64-NEXT: negw a0, a0
+; RV64-NEXT: neg a0, a0
; RV64-NEXT: fmax.d fa5, fa3, fa5
; RV64-NEXT: fmin.d fa5, fa5, fa4
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: sb a0, 9(sp)
-; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vle8.v v8, (a0)
+; RV64-NEXT: vslide1down.vx v8, v12, a0
; RV64-NEXT: vse8.v v8, (a1)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%a = load <8 x double>, ptr %x
%d = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> %a)
@@ -496,8 +510,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
;
; RV32-LABEL: fp2ui_v8f64_v8i8:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: lui a0, %hi(.LCPI13_0)
@@ -507,61 +519,69 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: sb a0, 8(sp)
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v8, a0
; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 7
-; RV32-NEXT: vfmv.f.s fa4, v12
+; RV32-NEXT: vslidedown.vi v16, v8, 1
+; RV32-NEXT: vfmv.f.s fa4, v16
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: sb a0, 15(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 6
-; RV32-NEXT: vfmv.f.s fa4, v12
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 2
+; RV32-NEXT: vfmv.f.s fa4, v16
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: sb a0, 14(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 5
-; RV32-NEXT: vfmv.f.s fa4, v12
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 3
+; RV32-NEXT: vfmv.f.s fa4, v16
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: sb a0, 13(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 4
-; RV32-NEXT: vfmv.f.s fa4, v12
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 4
+; RV32-NEXT: vfmv.f.s fa4, v16
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: sb a0, 12(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 3
-; RV32-NEXT: vfmv.f.s fa4, v12
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 5
+; RV32-NEXT: vfmv.f.s fa4, v16
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: sb a0, 11(sp)
-; RV32-NEXT: vslidedown.vi v12, v8, 2
-; RV32-NEXT: vfmv.f.s fa4, v12
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v16, v8, 6
+; RV32-NEXT: vfmv.f.s fa4, v16
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: sb a0, 10(sp)
-; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 7
; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
-; RV32-NEXT: sb a0, 9(sp)
-; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
+; RV32-NEXT: vslide1down.vx v8, v12, a0
; RV32-NEXT: vse8.v v8, (a1)
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: fp2ui_v8f64_v8i8:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: lui a0, %hi(.LCPI13_0)
@@ -571,55 +591,65 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: sb a0, 8(sp)
+; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v8, a0
; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 7
-; RV64-NEXT: vfmv.f.s fa4, v12
+; RV64-NEXT: vslidedown.vi v16, v8, 1
+; RV64-NEXT: vfmv.f.s fa4, v16
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: sb a0, 15(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 6
-; RV64-NEXT: vfmv.f.s fa4, v12
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 2
+; RV64-NEXT: vfmv.f.s fa4, v16
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: sb a0, 14(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 5
-; RV64-NEXT: vfmv.f.s fa4, v12
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 3
+; RV64-NEXT: vfmv.f.s fa4, v16
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: sb a0, 13(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 4
-; RV64-NEXT: vfmv.f.s fa4, v12
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 4
+; RV64-NEXT: vfmv.f.s fa4, v16
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: sb a0, 12(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 3
-; RV64-NEXT: vfmv.f.s fa4, v12
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 5
+; RV64-NEXT: vfmv.f.s fa4, v16
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: sb a0, 11(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 2
-; RV64-NEXT: vfmv.f.s fa4, v12
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v16, v8, 6
+; RV64-NEXT: vfmv.f.s fa4, v16
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: sb a0, 10(sp)
-; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 7
; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
-; RV64-NEXT: sb a0, 9(sp)
-; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vle8.v v8, (a0)
+; RV64-NEXT: vslide1down.vx v8, v12, a0
; RV64-NEXT: vse8.v v8, (a1)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%a = load <8 x double>, ptr %x
%d = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index 9951864e6525..110cf81a6716 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -216,86 +216,70 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) {
define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
; LMULMAX8RV32-LABEL: si2fp_v3i7_v3f32:
; LMULMAX8RV32: # %bb.0:
-; LMULMAX8RV32-NEXT: addi sp, sp, -16
-; LMULMAX8RV32-NEXT: .cfi_def_cfa_offset 16
-; LMULMAX8RV32-NEXT: lw a1, 8(a0)
-; LMULMAX8RV32-NEXT: sb a1, 14(sp)
-; LMULMAX8RV32-NEXT: lw a1, 4(a0)
-; LMULMAX8RV32-NEXT: sb a1, 13(sp)
-; LMULMAX8RV32-NEXT: lw a0, 0(a0)
-; LMULMAX8RV32-NEXT: sb a0, 12(sp)
-; LMULMAX8RV32-NEXT: addi a0, sp, 12
+; LMULMAX8RV32-NEXT: lw a1, 0(a0)
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32-NEXT: vle8.v v8, (a0)
+; LMULMAX8RV32-NEXT: lw a2, 4(a0)
+; LMULMAX8RV32-NEXT: lw a0, 8(a0)
+; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a1
+; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a2
+; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0
+; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV32-NEXT: vadd.vv v8, v8, v8
; LMULMAX8RV32-NEXT: vsra.vi v8, v8, 1
; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; LMULMAX8RV32-NEXT: vsext.vf2 v9, v8
; LMULMAX8RV32-NEXT: vfwcvt.f.x.v v8, v9
-; LMULMAX8RV32-NEXT: addi sp, sp, 16
; LMULMAX8RV32-NEXT: ret
;
; LMULMAX8RV64-LABEL: si2fp_v3i7_v3f32:
; LMULMAX8RV64: # %bb.0:
-; LMULMAX8RV64-NEXT: addi sp, sp, -16
-; LMULMAX8RV64-NEXT: .cfi_def_cfa_offset 16
-; LMULMAX8RV64-NEXT: ld a1, 16(a0)
-; LMULMAX8RV64-NEXT: sb a1, 14(sp)
-; LMULMAX8RV64-NEXT: ld a1, 8(a0)
-; LMULMAX8RV64-NEXT: sb a1, 13(sp)
-; LMULMAX8RV64-NEXT: ld a0, 0(a0)
-; LMULMAX8RV64-NEXT: sb a0, 12(sp)
-; LMULMAX8RV64-NEXT: addi a0, sp, 12
+; LMULMAX8RV64-NEXT: ld a1, 0(a0)
; LMULMAX8RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64-NEXT: vle8.v v8, (a0)
+; LMULMAX8RV64-NEXT: ld a2, 8(a0)
+; LMULMAX8RV64-NEXT: ld a0, 16(a0)
+; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a1
+; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a2
+; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0
+; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV64-NEXT: vadd.vv v8, v8, v8
; LMULMAX8RV64-NEXT: vsra.vi v8, v8, 1
; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; LMULMAX8RV64-NEXT: vsext.vf2 v9, v8
; LMULMAX8RV64-NEXT: vfwcvt.f.x.v v8, v9
-; LMULMAX8RV64-NEXT: addi sp, sp, 16
; LMULMAX8RV64-NEXT: ret
;
; LMULMAX1RV32-LABEL: si2fp_v3i7_v3f32:
; LMULMAX1RV32: # %bb.0:
-; LMULMAX1RV32-NEXT: addi sp, sp, -16
-; LMULMAX1RV32-NEXT: .cfi_def_cfa_offset 16
-; LMULMAX1RV32-NEXT: lw a1, 8(a0)
-; LMULMAX1RV32-NEXT: sb a1, 14(sp)
-; LMULMAX1RV32-NEXT: lw a1, 4(a0)
-; LMULMAX1RV32-NEXT: sb a1, 13(sp)
-; LMULMAX1RV32-NEXT: lw a0, 0(a0)
-; LMULMAX1RV32-NEXT: sb a0, 12(sp)
-; LMULMAX1RV32-NEXT: addi a0, sp, 12
+; LMULMAX1RV32-NEXT: lw a1, 0(a0)
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV32-NEXT: vle8.v v8, (a0)
+; LMULMAX1RV32-NEXT: lw a2, 4(a0)
+; LMULMAX1RV32-NEXT: lw a0, 8(a0)
+; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a1
+; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a2
+; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0
+; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX1RV32-NEXT: vadd.vv v8, v8, v8
; LMULMAX1RV32-NEXT: vsra.vi v8, v8, 1
; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; LMULMAX1RV32-NEXT: vsext.vf2 v9, v8
; LMULMAX1RV32-NEXT: vfwcvt.f.x.v v8, v9
-; LMULMAX1RV32-NEXT: addi sp, sp, 16
; LMULMAX1RV32-NEXT: ret
;
; LMULMAX1RV64-LABEL: si2fp_v3i7_v3f32:
; LMULMAX1RV64: # %bb.0:
-; LMULMAX1RV64-NEXT: addi sp, sp, -16
-; LMULMAX1RV64-NEXT: .cfi_def_cfa_offset 16
-; LMULMAX1RV64-NEXT: ld a1, 16(a0)
-; LMULMAX1RV64-NEXT: sb a1, 14(sp)
-; LMULMAX1RV64-NEXT: ld a1, 8(a0)
-; LMULMAX1RV64-NEXT: sb a1, 13(sp)
-; LMULMAX1RV64-NEXT: ld a0, 0(a0)
-; LMULMAX1RV64-NEXT: sb a0, 12(sp)
-; LMULMAX1RV64-NEXT: addi a0, sp, 12
+; LMULMAX1RV64-NEXT: ld a1, 0(a0)
; LMULMAX1RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV64-NEXT: vle8.v v8, (a0)
+; LMULMAX1RV64-NEXT: ld a2, 8(a0)
+; LMULMAX1RV64-NEXT: ld a0, 16(a0)
+; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a1
+; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a2
+; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0
+; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX1RV64-NEXT: vadd.vv v8, v8, v8
; LMULMAX1RV64-NEXT: vsra.vi v8, v8, 1
; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; LMULMAX1RV64-NEXT: vsext.vf2 v9, v8
; LMULMAX1RV64-NEXT: vfwcvt.f.x.v v8, v9
-; LMULMAX1RV64-NEXT: addi sp, sp, 16
; LMULMAX1RV64-NEXT: ret
%z = sitofp <3 x i7> %x to <3 x float>
ret <3 x float> %z
@@ -305,86 +289,70 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
; LMULMAX8RV32-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX8RV32: # %bb.0:
-; LMULMAX8RV32-NEXT: addi sp, sp, -16
-; LMULMAX8RV32-NEXT: .cfi_def_cfa_offset 16
-; LMULMAX8RV32-NEXT: lw a1, 8(a0)
-; LMULMAX8RV32-NEXT: sb a1, 14(sp)
-; LMULMAX8RV32-NEXT: lw a1, 4(a0)
-; LMULMAX8RV32-NEXT: sb a1, 13(sp)
-; LMULMAX8RV32-NEXT: lw a0, 0(a0)
-; LMULMAX8RV32-NEXT: sb a0, 12(sp)
-; LMULMAX8RV32-NEXT: addi a0, sp, 12
+; LMULMAX8RV32-NEXT: lw a1, 0(a0)
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32-NEXT: vle8.v v8, (a0)
+; LMULMAX8RV32-NEXT: lw a2, 4(a0)
+; LMULMAX8RV32-NEXT: lw a0, 8(a0)
+; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a1
+; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a2
+; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0
+; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV32-NEXT: li a0, 127
; LMULMAX8RV32-NEXT: vand.vx v8, v8, a0
; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; LMULMAX8RV32-NEXT: vzext.vf2 v9, v8
; LMULMAX8RV32-NEXT: vfwcvt.f.xu.v v8, v9
-; LMULMAX8RV32-NEXT: addi sp, sp, 16
; LMULMAX8RV32-NEXT: ret
;
; LMULMAX8RV64-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX8RV64: # %bb.0:
-; LMULMAX8RV64-NEXT: addi sp, sp, -16
-; LMULMAX8RV64-NEXT: .cfi_def_cfa_offset 16
-; LMULMAX8RV64-NEXT: ld a1, 16(a0)
-; LMULMAX8RV64-NEXT: sb a1, 14(sp)
-; LMULMAX8RV64-NEXT: ld a1, 8(a0)
-; LMULMAX8RV64-NEXT: sb a1, 13(sp)
-; LMULMAX8RV64-NEXT: ld a0, 0(a0)
-; LMULMAX8RV64-NEXT: sb a0, 12(sp)
-; LMULMAX8RV64-NEXT: addi a0, sp, 12
+; LMULMAX8RV64-NEXT: ld a1, 0(a0)
; LMULMAX8RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64-NEXT: vle8.v v8, (a0)
+; LMULMAX8RV64-NEXT: ld a2, 8(a0)
+; LMULMAX8RV64-NEXT: ld a0, 16(a0)
+; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a1
+; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a2
+; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0
+; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV64-NEXT: li a0, 127
; LMULMAX8RV64-NEXT: vand.vx v8, v8, a0
; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; LMULMAX8RV64-NEXT: vzext.vf2 v9, v8
; LMULMAX8RV64-NEXT: vfwcvt.f.xu.v v8, v9
-; LMULMAX8RV64-NEXT: addi sp, sp, 16
; LMULMAX8RV64-NEXT: ret
;
; LMULMAX1RV32-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX1RV32: # %bb.0:
-; LMULMAX1RV32-NEXT: addi sp, sp, -16
-; LMULMAX1RV32-NEXT: .cfi_def_cfa_offset 16
-; LMULMAX1RV32-NEXT: lw a1, 8(a0)
-; LMULMAX1RV32-NEXT: sb a1, 14(sp)
-; LMULMAX1RV32-NEXT: lw a1, 4(a0)
-; LMULMAX1RV32-NEXT: sb a1, 13(sp)
-; LMULMAX1RV32-NEXT: lw a0, 0(a0)
-; LMULMAX1RV32-NEXT: sb a0, 12(sp)
-; LMULMAX1RV32-NEXT: addi a0, sp, 12
+; LMULMAX1RV32-NEXT: lw a1, 0(a0)
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV32-NEXT: vle8.v v8, (a0)
+; LMULMAX1RV32-NEXT: lw a2, 4(a0)
+; LMULMAX1RV32-NEXT: lw a0, 8(a0)
+; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a1
+; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a2
+; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0
+; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX1RV32-NEXT: li a0, 127
; LMULMAX1RV32-NEXT: vand.vx v8, v8, a0
; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; LMULMAX1RV32-NEXT: vzext.vf2 v9, v8
; LMULMAX1RV32-NEXT: vfwcvt.f.xu.v v8, v9
-; LMULMAX1RV32-NEXT: addi sp, sp, 16
; LMULMAX1RV32-NEXT: ret
;
; LMULMAX1RV64-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX1RV64: # %bb.0:
-; LMULMAX1RV64-NEXT: addi sp, sp, -16
-; LMULMAX1RV64-NEXT: .cfi_def_cfa_offset 16
-; LMULMAX1RV64-NEXT: ld a1, 16(a0)
-; LMULMAX1RV64-NEXT: sb a1, 14(sp)
-; LMULMAX1RV64-NEXT: ld a1, 8(a0)
-; LMULMAX1RV64-NEXT: sb a1, 13(sp)
-; LMULMAX1RV64-NEXT: ld a0, 0(a0)
-; LMULMAX1RV64-NEXT: sb a0, 12(sp)
-; LMULMAX1RV64-NEXT: addi a0, sp, 12
+; LMULMAX1RV64-NEXT: ld a1, 0(a0)
; LMULMAX1RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV64-NEXT: vle8.v v8, (a0)
+; LMULMAX1RV64-NEXT: ld a2, 8(a0)
+; LMULMAX1RV64-NEXT: ld a0, 16(a0)
+; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a1
+; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a2
+; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0
+; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX1RV64-NEXT: li a0, 127
; LMULMAX1RV64-NEXT: vand.vx v8, v8, a0
; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; LMULMAX1RV64-NEXT: vzext.vf2 v9, v8
; LMULMAX1RV64-NEXT: vfwcvt.f.xu.v v8, v9
-; LMULMAX1RV64-NEXT: addi sp, sp, 16
; LMULMAX1RV64-NEXT: ret
%z = uitofp <3 x i7> %x to <3 x float>
ret <3 x float> %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
index 56af96e6c630..c2aa59000f66 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
@@ -21,8 +21,6 @@ define <5 x i8> @load_v5i8(ptr %p) {
define <5 x i8> @load_v5i8_align1(ptr %p) {
; RV32-LABEL: load_v5i8_align1:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: lbu a1, 1(a0)
; RV32-NEXT: lbu a2, 0(a0)
; RV32-NEXT: lbu a3, 2(a0)
@@ -35,29 +33,28 @@ define <5 x i8> @load_v5i8_align1(ptr %p) {
; RV32-NEXT: or a1, a3, a1
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vmv.s.x v8, a1
+; RV32-NEXT: vslidedown.vi v9, v8, 3
+; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vmv.x.s a2, v9
; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vslidedown.vi v11, v8, 3
+; RV32-NEXT: vmv.x.s a3, v9
+; RV32-NEXT: vmv.x.s a4, v8
; RV32-NEXT: lb a0, 4(a0)
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; RV32-NEXT: vse8.v v8, (a1)
-; RV32-NEXT: addi a2, sp, 11
-; RV32-NEXT: vse8.v v11, (a2)
-; RV32-NEXT: addi a2, sp, 10
-; RV32-NEXT: vse8.v v10, (a2)
-; RV32-NEXT: addi a2, sp, 9
-; RV32-NEXT: vse8.v v9, (a2)
-; RV32-NEXT: sb a0, 12(sp)
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vle8.v v8, (a1)
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: ret
;
; RV64-LABEL: load_v5i8_align1:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: lbu a1, 1(a0)
; RV64-NEXT: lbu a2, 0(a0)
; RV64-NEXT: lbu a3, 2(a0)
@@ -70,23 +67,24 @@ define <5 x i8> @load_v5i8_align1(ptr %p) {
; RV64-NEXT: or a1, a3, a1
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v8, a1
+; RV64-NEXT: vslidedown.vi v9, v8, 3
+; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
+; RV64-NEXT: vmv.x.s a1, v9
+; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vmv.x.s a2, v9
; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v11, v8, 3
+; RV64-NEXT: vmv.x.s a3, v9
+; RV64-NEXT: vmv.x.s a4, v8
; RV64-NEXT: lb a0, 4(a0)
-; RV64-NEXT: addi a1, sp, 8
-; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; RV64-NEXT: vse8.v v8, (a1)
-; RV64-NEXT: addi a2, sp, 11
-; RV64-NEXT: vse8.v v11, (a2)
-; RV64-NEXT: addi a2, sp, 10
-; RV64-NEXT: vse8.v v10, (a2)
-; RV64-NEXT: addi a2, sp, 9
-; RV64-NEXT: vse8.v v9, (a2)
-; RV64-NEXT: sb a0, 12(sp)
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vle8.v v8, (a1)
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: vslide1down.vx v8, v8, a4
+; RV64-NEXT: vslide1down.vx v8, v8, a3
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: ret
%x = load <5 x i8>, ptr %p, align 1
ret <5 x i8> %x
@@ -180,60 +178,54 @@ define <6 x double> @load_v6f64(ptr %p) {
define <6 x i1> @load_v6i1(ptr %p) {
; RV32-LABEL: load_v6i1:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: lbu a0, 0(a0)
-; RV32-NEXT: slli a1, a0, 30
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: slli a2, a0, 29
+; RV32-NEXT: srli a1, a0, 5
+; RV32-NEXT: slli a2, a0, 27
; RV32-NEXT: srli a2, a2, 31
; RV32-NEXT: slli a3, a0, 28
; RV32-NEXT: srli a3, a3, 31
-; RV32-NEXT: slli a4, a0, 27
+; RV32-NEXT: slli a4, a0, 29
; RV32-NEXT: srli a4, a4, 31
-; RV32-NEXT: andi a5, a0, 1
-; RV32-NEXT: srli a0, a0, 5
-; RV32-NEXT: sb a0, 13(sp)
-; RV32-NEXT: sb a5, 8(sp)
-; RV32-NEXT: sb a4, 12(sp)
-; RV32-NEXT: sb a3, 11(sp)
-; RV32-NEXT: sb a2, 10(sp)
-; RV32-NEXT: sb a1, 9(sp)
-; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: slli a5, a0, 30
+; RV32-NEXT: srli a5, a5, 31
+; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vand.vi v8, v8, 1
; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: load_v6i1:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: lbu a0, 0(a0)
-; RV64-NEXT: slli a1, a0, 62
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: slli a2, a0, 61
+; RV64-NEXT: srli a1, a0, 5
+; RV64-NEXT: slli a2, a0, 59
; RV64-NEXT: srli a2, a2, 63
; RV64-NEXT: slli a3, a0, 60
; RV64-NEXT: srli a3, a3, 63
-; RV64-NEXT: slli a4, a0, 59
+; RV64-NEXT: slli a4, a0, 61
; RV64-NEXT: srli a4, a4, 63
-; RV64-NEXT: andi a5, a0, 1
-; RV64-NEXT: srli a0, a0, 5
-; RV64-NEXT: sb a0, 13(sp)
-; RV64-NEXT: sb a5, 8(sp)
-; RV64-NEXT: sb a4, 12(sp)
-; RV64-NEXT: sb a3, 11(sp)
-; RV64-NEXT: sb a2, 10(sp)
-; RV64-NEXT: sb a1, 9(sp)
-; RV64-NEXT: addi a0, sp, 8
+; RV64-NEXT: slli a5, a0, 62
+; RV64-NEXT: srli a5, a5, 63
+; RV64-NEXT: andi a0, a0, 1
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vle8.v v8, (a0)
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vslide1down.vx v8, v8, a5
+; RV64-NEXT: vslide1down.vx v8, v8, a4
+; RV64-NEXT: vslide1down.vx v8, v8, a3
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: vand.vi v8, v8, 1
; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%x = load <6 x i1>, ptr %p
ret <6 x i1> %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index 27e991852893..68fb15a58258 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -81,30 +81,20 @@ define <2 x i1> @buildvec_mask_nonconst_v2i1(i1 %x, i1 %y) {
define <2 x i1> @buildvec_mask_optsize_nonconst_v2i1(i1 %x, i1 %y) optsize {
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v2i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: sb a1, 15(sp)
-; CHECK-NEXT: sb a0, 14(sp)
-; CHECK-NEXT: addi a0, sp, 14
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
;
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v2i1:
; ZVE32F: # %bb.0:
-; ZVE32F-NEXT: addi sp, sp, -16
-; ZVE32F-NEXT: .cfi_def_cfa_offset 16
-; ZVE32F-NEXT: sb a1, 15(sp)
-; ZVE32F-NEXT: sb a0, 14(sp)
-; ZVE32F-NEXT: addi a0, sp, 14
; ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; ZVE32F-NEXT: vle8.v v8, (a0)
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
-; ZVE32F-NEXT: addi sp, sp, 16
; ZVE32F-NEXT: ret
%1 = insertelement <2 x i1> poison, i1 %x, i32 0
%2 = insertelement <2 x i1> %1, i1 %y, i32 1
@@ -195,34 +185,24 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) {
define <4 x i1> @buildvec_mask_optsize_nonconst_v4i1(i1 %x, i1 %y) optsize {
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v4i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: sb a1, 15(sp)
-; CHECK-NEXT: sb a1, 14(sp)
-; CHECK-NEXT: sb a0, 13(sp)
-; CHECK-NEXT: sb a0, 12(sp)
-; CHECK-NEXT: addi a0, sp, 12
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
;
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v4i1:
; ZVE32F: # %bb.0:
-; ZVE32F-NEXT: addi sp, sp, -16
-; ZVE32F-NEXT: .cfi_def_cfa_offset 16
-; ZVE32F-NEXT: sb a1, 15(sp)
-; ZVE32F-NEXT: sb a1, 14(sp)
-; ZVE32F-NEXT: sb a0, 13(sp)
-; ZVE32F-NEXT: sb a0, 12(sp)
-; ZVE32F-NEXT: addi a0, sp, 12
; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32F-NEXT: vle8.v v8, (a0)
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
-; ZVE32F-NEXT: addi sp, sp, 16
; ZVE32F-NEXT: ret
%1 = insertelement <4 x i1> poison, i1 %x, i32 0
%2 = insertelement <4 x i1> %1, i1 %x, i32 1
@@ -234,36 +214,26 @@ define <4 x i1> @buildvec_mask_optsize_nonconst_v4i1(i1 %x, i1 %y) optsize {
define <4 x i1> @buildvec_mask_nonconst_v4i1_2(i1 %x, i1 %y) {
; CHECK-LABEL: buildvec_mask_nonconst_v4i1_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: sb a1, 15(sp)
-; CHECK-NEXT: li a1, 1
-; CHECK-NEXT: sb a1, 14(sp)
-; CHECK-NEXT: sb a0, 13(sp)
-; CHECK-NEXT: sb zero, 12(sp)
-; CHECK-NEXT: addi a0, sp, 12
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, zero
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
;
; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1_2:
; ZVE32F: # %bb.0:
-; ZVE32F-NEXT: addi sp, sp, -16
-; ZVE32F-NEXT: .cfi_def_cfa_offset 16
-; ZVE32F-NEXT: sb a1, 15(sp)
-; ZVE32F-NEXT: li a1, 1
-; ZVE32F-NEXT: sb a1, 14(sp)
-; ZVE32F-NEXT: sb a0, 13(sp)
-; ZVE32F-NEXT: sb zero, 12(sp)
-; ZVE32F-NEXT: addi a0, sp, 12
; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32F-NEXT: vle8.v v8, (a0)
+; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: li a0, 1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
-; ZVE32F-NEXT: addi sp, sp, 16
; ZVE32F-NEXT: ret
%1 = insertelement <4 x i1> poison, i1 0, i32 0
%2 = insertelement <4 x i1> %1, i1 %x, i32 1
@@ -325,44 +295,34 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) {
define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; CHECK-LABEL: buildvec_mask_nonconst_v8i1_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: sb a2, 15(sp)
-; CHECK-NEXT: sb zero, 14(sp)
-; CHECK-NEXT: sb a3, 13(sp)
-; CHECK-NEXT: sb a0, 12(sp)
-; CHECK-NEXT: sb a1, 11(sp)
-; CHECK-NEXT: li a1, 1
-; CHECK-NEXT: sb a1, 10(sp)
-; CHECK-NEXT: sb a0, 9(sp)
-; CHECK-NEXT: sb a0, 8(sp)
-; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: li a4, 1
+; CHECK-NEXT: vslide1down.vx v8, v8, a4
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vslide1down.vx v8, v8, zero
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
;
; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1_2:
; ZVE32F: # %bb.0:
-; ZVE32F-NEXT: addi sp, sp, -16
-; ZVE32F-NEXT: .cfi_def_cfa_offset 16
-; ZVE32F-NEXT: sb a2, 15(sp)
-; ZVE32F-NEXT: sb zero, 14(sp)
-; ZVE32F-NEXT: sb a3, 13(sp)
-; ZVE32F-NEXT: sb a0, 12(sp)
-; ZVE32F-NEXT: sb a1, 11(sp)
-; ZVE32F-NEXT: li a1, 1
-; ZVE32F-NEXT: sb a1, 10(sp)
-; ZVE32F-NEXT: sb a0, 9(sp)
-; ZVE32F-NEXT: sb a0, 8(sp)
-; ZVE32F-NEXT: addi a0, sp, 8
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; ZVE32F-NEXT: vle8.v v8, (a0)
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: li a4, 1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
-; ZVE32F-NEXT: addi sp, sp, 16
; ZVE32F-NEXT: ret
%1 = insertelement <8 x i1> poison, i1 %x, i32 0
%2 = insertelement <8 x i1> %1, i1 %x, i32 1
@@ -378,44 +338,34 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) optsize {
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: sb a2, 15(sp)
-; CHECK-NEXT: sb zero, 14(sp)
-; CHECK-NEXT: sb a3, 13(sp)
-; CHECK-NEXT: sb a0, 12(sp)
-; CHECK-NEXT: sb a1, 11(sp)
-; CHECK-NEXT: li a1, 1
-; CHECK-NEXT: sb a1, 10(sp)
-; CHECK-NEXT: sb a0, 9(sp)
-; CHECK-NEXT: sb a0, 8(sp)
-; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: li a4, 1
+; CHECK-NEXT: vslide1down.vx v8, v8, a4
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vslide1down.vx v8, v8, zero
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
;
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1_2:
; ZVE32F: # %bb.0:
-; ZVE32F-NEXT: addi sp, sp, -16
-; ZVE32F-NEXT: .cfi_def_cfa_offset 16
-; ZVE32F-NEXT: sb a2, 15(sp)
-; ZVE32F-NEXT: sb zero, 14(sp)
-; ZVE32F-NEXT: sb a3, 13(sp)
-; ZVE32F-NEXT: sb a0, 12(sp)
-; ZVE32F-NEXT: sb a1, 11(sp)
-; ZVE32F-NEXT: li a1, 1
-; ZVE32F-NEXT: sb a1, 10(sp)
-; ZVE32F-NEXT: sb a0, 9(sp)
-; ZVE32F-NEXT: sb a0, 8(sp)
-; ZVE32F-NEXT: addi a0, sp, 8
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; ZVE32F-NEXT: vle8.v v8, (a0)
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: li a4, 1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
-; ZVE32F-NEXT: addi sp, sp, 16
; ZVE32F-NEXT: ret
%1 = insertelement <8 x i1> poison, i1 %x, i32 0
%2 = insertelement <8 x i1> %1, i1 %x, i32 1
@@ -431,42 +381,32 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: sb a1, 15(sp)
-; CHECK-NEXT: sb a1, 14(sp)
-; CHECK-NEXT: sb a1, 13(sp)
-; CHECK-NEXT: sb a0, 12(sp)
-; CHECK-NEXT: sb a1, 11(sp)
-; CHECK-NEXT: sb a1, 10(sp)
-; CHECK-NEXT: sb a0, 9(sp)
-; CHECK-NEXT: sb a0, 8(sp)
-; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
;
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1:
; ZVE32F: # %bb.0:
-; ZVE32F-NEXT: addi sp, sp, -16
-; ZVE32F-NEXT: .cfi_def_cfa_offset 16
-; ZVE32F-NEXT: sb a1, 15(sp)
-; ZVE32F-NEXT: sb a1, 14(sp)
-; ZVE32F-NEXT: sb a1, 13(sp)
-; ZVE32F-NEXT: sb a0, 12(sp)
-; ZVE32F-NEXT: sb a1, 11(sp)
-; ZVE32F-NEXT: sb a1, 10(sp)
-; ZVE32F-NEXT: sb a0, 9(sp)
-; ZVE32F-NEXT: sb a0, 8(sp)
-; ZVE32F-NEXT: addi a0, sp, 8
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; ZVE32F-NEXT: vle8.v v8, (a0)
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
-; ZVE32F-NEXT: addi sp, sp, 16
; ZVE32F-NEXT: ret
%1 = insertelement <8 x i1> poison, i1 %x, i32 0
%2 = insertelement <8 x i1> %1, i1 %x, i32 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 1b26fba4df76..a30afd02bdd5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -6775,191 +6775,156 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: addi sp, sp, -96
-; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 96
-; RV32ZVE32F-NEXT: sw ra, 92(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s0, 88(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s2, 84(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s3, 80(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s4, 76(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s5, 72(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s6, 68(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s7, 64(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s8, 60(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s9, 56(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s10, 52(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s11, 48(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: .cfi_offset ra, -4
-; RV32ZVE32F-NEXT: .cfi_offset s0, -8
-; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: .cfi_offset s3, -16
-; RV32ZVE32F-NEXT: .cfi_offset s4, -20
-; RV32ZVE32F-NEXT: .cfi_offset s5, -24
-; RV32ZVE32F-NEXT: .cfi_offset s6, -28
-; RV32ZVE32F-NEXT: .cfi_offset s7, -32
-; RV32ZVE32F-NEXT: .cfi_offset s8, -36
-; RV32ZVE32F-NEXT: .cfi_offset s9, -40
-; RV32ZVE32F-NEXT: .cfi_offset s10, -44
-; RV32ZVE32F-NEXT: .cfi_offset s11, -48
-; RV32ZVE32F-NEXT: addi s0, sp, 96
-; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0
-; RV32ZVE32F-NEXT: andi sp, sp, -32
-; RV32ZVE32F-NEXT: lw a4, 60(a3)
-; RV32ZVE32F-NEXT: lw a5, 56(a3)
-; RV32ZVE32F-NEXT: lw a6, 52(a3)
-; RV32ZVE32F-NEXT: lw a7, 48(a3)
-; RV32ZVE32F-NEXT: lw t0, 44(a3)
-; RV32ZVE32F-NEXT: lw t1, 40(a3)
-; RV32ZVE32F-NEXT: lw t2, 36(a3)
-; RV32ZVE32F-NEXT: lw t3, 32(a3)
-; RV32ZVE32F-NEXT: lw t4, 28(a3)
-; RV32ZVE32F-NEXT: lw t5, 24(a3)
-; RV32ZVE32F-NEXT: lw t6, 20(a3)
-; RV32ZVE32F-NEXT: lw s2, 16(a3)
-; RV32ZVE32F-NEXT: lw s3, 12(a3)
-; RV32ZVE32F-NEXT: lw s5, 8(a3)
-; RV32ZVE32F-NEXT: lw s4, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
-; RV32ZVE32F-NEXT: lw s6, 0(a2)
-; RV32ZVE32F-NEXT: lw s7, 8(a2)
-; RV32ZVE32F-NEXT: lw s8, 16(a2)
-; RV32ZVE32F-NEXT: lw s9, 24(a2)
-; RV32ZVE32F-NEXT: lw s10, 56(a2)
-; RV32ZVE32F-NEXT: lw s11, 48(a2)
-; RV32ZVE32F-NEXT: lw ra, 40(a2)
-; RV32ZVE32F-NEXT: lw a2, 32(a2)
-; RV32ZVE32F-NEXT: sw s10, 28(sp)
-; RV32ZVE32F-NEXT: sw s11, 24(sp)
-; RV32ZVE32F-NEXT: sw ra, 20(sp)
-; RV32ZVE32F-NEXT: sw a2, 16(sp)
-; RV32ZVE32F-NEXT: sw s9, 12(sp)
-; RV32ZVE32F-NEXT: sw s8, 8(sp)
-; RV32ZVE32F-NEXT: sw s7, 4(sp)
-; RV32ZVE32F-NEXT: sw s6, 0(sp)
-; RV32ZVE32F-NEXT: mv a2, sp
+; RV32ZVE32F-NEXT: addi sp, sp, -16
+; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
+; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: .cfi_offset s0, -4
+; RV32ZVE32F-NEXT: .cfi_offset s1, -8
+; RV32ZVE32F-NEXT: lw a4, 56(a2)
+; RV32ZVE32F-NEXT: lw a5, 48(a2)
+; RV32ZVE32F-NEXT: lw a6, 40(a2)
+; RV32ZVE32F-NEXT: lw a7, 32(a2)
+; RV32ZVE32F-NEXT: lw t0, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 0(a2)
+; RV32ZVE32F-NEXT: lw t2, 8(a2)
+; RV32ZVE32F-NEXT: lw a2, 16(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vle32.v v8, (a2)
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a1, v0
-; RV32ZVE32F-NEXT: andi a2, a1, 1
-; RV32ZVE32F-NEXT: bnez a2, .LBB57_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
-; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: bnez a2, .LBB57_11
-; RV32ZVE32F-NEXT: .LBB57_2: # %else2
-; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: bnez a2, .LBB57_12
-; RV32ZVE32F-NEXT: .LBB57_3: # %else5
-; RV32ZVE32F-NEXT: andi a2, a1, 8
-; RV32ZVE32F-NEXT: bnez a2, .LBB57_13
-; RV32ZVE32F-NEXT: .LBB57_4: # %else8
-; RV32ZVE32F-NEXT: andi a2, a1, 16
-; RV32ZVE32F-NEXT: bnez a2, .LBB57_14
-; RV32ZVE32F-NEXT: .LBB57_5: # %else11
-; RV32ZVE32F-NEXT: andi a2, a1, 32
-; RV32ZVE32F-NEXT: bnez a2, .LBB57_15
-; RV32ZVE32F-NEXT: .LBB57_6: # %else14
-; RV32ZVE32F-NEXT: andi a2, a1, 64
-; RV32ZVE32F-NEXT: bnez a2, .LBB57_16
-; RV32ZVE32F-NEXT: .LBB57_7: # %else17
-; RV32ZVE32F-NEXT: andi a1, a1, -128
+; RV32ZVE32F-NEXT: vmv.x.s t0, v0
+; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB57_9
-; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load19
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
-; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: lw a4, 4(a1)
-; RV32ZVE32F-NEXT: lw a5, 0(a1)
-; RV32ZVE32F-NEXT: .LBB57_9: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw s4, 4(a0)
-; RV32ZVE32F-NEXT: sw s5, 8(a0)
-; RV32ZVE32F-NEXT: sw s3, 12(a0)
-; RV32ZVE32F-NEXT: sw s2, 16(a0)
-; RV32ZVE32F-NEXT: sw t6, 20(a0)
-; RV32ZVE32F-NEXT: sw t5, 24(a0)
-; RV32ZVE32F-NEXT: sw t4, 28(a0)
-; RV32ZVE32F-NEXT: sw t3, 32(a0)
-; RV32ZVE32F-NEXT: sw t2, 36(a0)
-; RV32ZVE32F-NEXT: sw t1, 40(a0)
-; RV32ZVE32F-NEXT: sw t0, 44(a0)
-; RV32ZVE32F-NEXT: sw a7, 48(a0)
-; RV32ZVE32F-NEXT: sw a6, 52(a0)
-; RV32ZVE32F-NEXT: sw a5, 56(a0)
-; RV32ZVE32F-NEXT: sw a4, 60(a0)
-; RV32ZVE32F-NEXT: addi sp, s0, -96
-; RV32ZVE32F-NEXT: lw ra, 92(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s0, 88(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s2, 84(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s3, 80(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s4, 76(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s5, 72(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s6, 68(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s7, 64(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s8, 60(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s9, 56(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s10, 52(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s11, 48(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: addi sp, sp, 96
-; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load
+; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw s4, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: beqz a2, .LBB57_2
-; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load1
+; RV32ZVE32F-NEXT: lw a1, 4(a2)
+; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: bnez a4, .LBB57_10
+; RV32ZVE32F-NEXT: .LBB57_2:
+; RV32ZVE32F-NEXT: lw a4, 12(a3)
+; RV32ZVE32F-NEXT: lw a5, 8(a3)
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: bnez a6, .LBB57_11
+; RV32ZVE32F-NEXT: .LBB57_3:
+; RV32ZVE32F-NEXT: lw a6, 20(a3)
+; RV32ZVE32F-NEXT: lw a7, 16(a3)
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: bnez t1, .LBB57_12
+; RV32ZVE32F-NEXT: .LBB57_4:
+; RV32ZVE32F-NEXT: lw t1, 28(a3)
+; RV32ZVE32F-NEXT: lw t2, 24(a3)
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: bnez t3, .LBB57_13
+; RV32ZVE32F-NEXT: .LBB57_5:
+; RV32ZVE32F-NEXT: lw t3, 36(a3)
+; RV32ZVE32F-NEXT: lw t4, 32(a3)
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: bnez t5, .LBB57_14
+; RV32ZVE32F-NEXT: .LBB57_6:
+; RV32ZVE32F-NEXT: lw t5, 44(a3)
+; RV32ZVE32F-NEXT: lw t6, 40(a3)
+; RV32ZVE32F-NEXT: andi s0, t0, 64
+; RV32ZVE32F-NEXT: bnez s0, .LBB57_15
+; RV32ZVE32F-NEXT: .LBB57_7:
+; RV32ZVE32F-NEXT: lw s0, 52(a3)
+; RV32ZVE32F-NEXT: lw s1, 48(a3)
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: bnez t0, .LBB57_16
+; RV32ZVE32F-NEXT: .LBB57_8:
+; RV32ZVE32F-NEXT: lw t0, 60(a3)
+; RV32ZVE32F-NEXT: lw a3, 56(a3)
+; RV32ZVE32F-NEXT: j .LBB57_17
+; RV32ZVE32F-NEXT: .LBB57_9:
+; RV32ZVE32F-NEXT: lw a1, 4(a3)
+; RV32ZVE32F-NEXT: lw a2, 0(a3)
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB57_2
+; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
-; RV32ZVE32F-NEXT: vmv.x.s a2, v10
-; RV32ZVE32F-NEXT: lw s3, 4(a2)
-; RV32ZVE32F-NEXT: lw s5, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: beqz a2, .LBB57_3
-; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load4
+; RV32ZVE32F-NEXT: vmv.x.s a5, v10
+; RV32ZVE32F-NEXT: lw a4, 4(a5)
+; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB57_3
+; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
-; RV32ZVE32F-NEXT: vmv.x.s a2, v10
-; RV32ZVE32F-NEXT: lw t6, 4(a2)
-; RV32ZVE32F-NEXT: lw s2, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 8
-; RV32ZVE32F-NEXT: beqz a2, .LBB57_4
-; RV32ZVE32F-NEXT: .LBB57_13: # %cond.load7
+; RV32ZVE32F-NEXT: vmv.x.s a7, v10
+; RV32ZVE32F-NEXT: lw a6, 4(a7)
+; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB57_4
+; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV32ZVE32F-NEXT: vmv.x.s a2, v10
-; RV32ZVE32F-NEXT: lw t4, 4(a2)
-; RV32ZVE32F-NEXT: lw t5, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 16
-; RV32ZVE32F-NEXT: beqz a2, .LBB57_5
-; RV32ZVE32F-NEXT: .LBB57_14: # %cond.load10
+; RV32ZVE32F-NEXT: vmv.x.s t2, v10
+; RV32ZVE32F-NEXT: lw t1, 4(t2)
+; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB57_5
+; RV32ZVE32F-NEXT: .LBB57_13: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
-; RV32ZVE32F-NEXT: vmv.x.s a2, v10
-; RV32ZVE32F-NEXT: lw t2, 4(a2)
-; RV32ZVE32F-NEXT: lw t3, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 32
-; RV32ZVE32F-NEXT: beqz a2, .LBB57_6
-; RV32ZVE32F-NEXT: .LBB57_15: # %cond.load13
+; RV32ZVE32F-NEXT: vmv.x.s t4, v10
+; RV32ZVE32F-NEXT: lw t3, 4(t4)
+; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB57_6
+; RV32ZVE32F-NEXT: .LBB57_14: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
-; RV32ZVE32F-NEXT: vmv.x.s a2, v10
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw t1, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 64
-; RV32ZVE32F-NEXT: beqz a2, .LBB57_7
-; RV32ZVE32F-NEXT: .LBB57_16: # %cond.load16
+; RV32ZVE32F-NEXT: vmv.x.s t6, v10
+; RV32ZVE32F-NEXT: lw t5, 4(t6)
+; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: andi s0, t0, 64
+; RV32ZVE32F-NEXT: beqz s0, .LBB57_7
+; RV32ZVE32F-NEXT: .LBB57_15: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
-; RV32ZVE32F-NEXT: vmv.x.s a2, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a2)
-; RV32ZVE32F-NEXT: lw a7, 0(a2)
-; RV32ZVE32F-NEXT: andi a1, a1, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB57_8
-; RV32ZVE32F-NEXT: j .LBB57_9
+; RV32ZVE32F-NEXT: vmv.x.s s1, v10
+; RV32ZVE32F-NEXT: lw s0, 4(s1)
+; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB57_8
+; RV32ZVE32F-NEXT: .LBB57_16: # %cond.load19
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
+; RV32ZVE32F-NEXT: vmv.x.s a3, v8
+; RV32ZVE32F-NEXT: lw t0, 4(a3)
+; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: .LBB57_17: # %else20
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a1, 4(a0)
+; RV32ZVE32F-NEXT: sw a5, 8(a0)
+; RV32ZVE32F-NEXT: sw a4, 12(a0)
+; RV32ZVE32F-NEXT: sw a7, 16(a0)
+; RV32ZVE32F-NEXT: sw a6, 20(a0)
+; RV32ZVE32F-NEXT: sw t2, 24(a0)
+; RV32ZVE32F-NEXT: sw t1, 28(a0)
+; RV32ZVE32F-NEXT: sw t4, 32(a0)
+; RV32ZVE32F-NEXT: sw t3, 36(a0)
+; RV32ZVE32F-NEXT: sw t6, 40(a0)
+; RV32ZVE32F-NEXT: sw t5, 44(a0)
+; RV32ZVE32F-NEXT: sw s1, 48(a0)
+; RV32ZVE32F-NEXT: sw s0, 52(a0)
+; RV32ZVE32F-NEXT: sw a3, 56(a0)
+; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: addi sp, sp, 16
+; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -11974,34 +11939,23 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8f64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: addi sp, sp, -64
-; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 64
-; RV32ZVE32F-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: .cfi_offset ra, -4
-; RV32ZVE32F-NEXT: .cfi_offset s0, -8
-; RV32ZVE32F-NEXT: addi s0, sp, 64
-; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0
-; RV32ZVE32F-NEXT: andi sp, sp, -32
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
-; RV32ZVE32F-NEXT: lw a4, 8(a2)
-; RV32ZVE32F-NEXT: lw a5, 16(a2)
-; RV32ZVE32F-NEXT: lw a6, 24(a2)
-; RV32ZVE32F-NEXT: lw a7, 56(a2)
-; RV32ZVE32F-NEXT: lw t0, 48(a2)
-; RV32ZVE32F-NEXT: lw t1, 40(a2)
-; RV32ZVE32F-NEXT: lw a2, 32(a2)
-; RV32ZVE32F-NEXT: sw a7, 28(sp)
-; RV32ZVE32F-NEXT: sw t0, 24(sp)
-; RV32ZVE32F-NEXT: sw t1, 20(sp)
-; RV32ZVE32F-NEXT: sw a2, 16(sp)
-; RV32ZVE32F-NEXT: sw a6, 12(sp)
-; RV32ZVE32F-NEXT: sw a5, 8(sp)
-; RV32ZVE32F-NEXT: sw a4, 4(sp)
-; RV32ZVE32F-NEXT: sw a3, 0(sp)
-; RV32ZVE32F-NEXT: mv a2, sp
+; RV32ZVE32F-NEXT: lw a3, 56(a2)
+; RV32ZVE32F-NEXT: lw a4, 48(a2)
+; RV32ZVE32F-NEXT: lw a5, 40(a2)
+; RV32ZVE32F-NEXT: lw a6, 32(a2)
+; RV32ZVE32F-NEXT: lw a7, 24(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw t1, 8(a2)
+; RV32ZVE32F-NEXT: lw a2, 16(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vle32.v v8, (a2)
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
@@ -12043,10 +11997,6 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
-; RV32ZVE32F-NEXT: addi sp, s0, -64
-; RV32ZVE32F-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: addi sp, sp, 64
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load
; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 31bf057d9e4e..b00f8082b385 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -5684,22 +5684,19 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: addi sp, sp, -96
-; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 96
-; RV32ZVE32F-NEXT: sw ra, 92(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s0, 88(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s2, 84(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s3, 80(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s4, 76(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s5, 72(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s6, 68(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s7, 64(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s8, 60(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s9, 56(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s10, 52(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s11, 48(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: .cfi_offset ra, -4
-; RV32ZVE32F-NEXT: .cfi_offset s0, -8
+; RV32ZVE32F-NEXT: addi sp, sp, -48
+; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 48
+; RV32ZVE32F-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: .cfi_offset s0, -4
+; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
; RV32ZVE32F-NEXT: .cfi_offset s3, -16
; RV32ZVE32F-NEXT: .cfi_offset s4, -20
@@ -5707,12 +5704,6 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: .cfi_offset s6, -28
; RV32ZVE32F-NEXT: .cfi_offset s7, -32
; RV32ZVE32F-NEXT: .cfi_offset s8, -36
-; RV32ZVE32F-NEXT: .cfi_offset s9, -40
-; RV32ZVE32F-NEXT: .cfi_offset s10, -44
-; RV32ZVE32F-NEXT: .cfi_offset s11, -48
-; RV32ZVE32F-NEXT: addi s0, sp, 96
-; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0
-; RV32ZVE32F-NEXT: andi sp, sp, -32
; RV32ZVE32F-NEXT: lw a3, 60(a0)
; RV32ZVE32F-NEXT: lw a4, 56(a0)
; RV32ZVE32F-NEXT: lw a5, 52(a0)
@@ -5725,55 +5716,51 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: lw t4, 24(a0)
; RV32ZVE32F-NEXT: lw t5, 20(a0)
; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s3, 12(a0)
-; RV32ZVE32F-NEXT: lw s2, 8(a0)
-; RV32ZVE32F-NEXT: lw s5, 4(a0)
-; RV32ZVE32F-NEXT: lw s4, 0(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a2)
-; RV32ZVE32F-NEXT: lw s6, 8(a2)
-; RV32ZVE32F-NEXT: lw s7, 16(a2)
-; RV32ZVE32F-NEXT: lw s8, 24(a2)
-; RV32ZVE32F-NEXT: lw s9, 56(a2)
-; RV32ZVE32F-NEXT: lw s10, 48(a2)
-; RV32ZVE32F-NEXT: lw s11, 40(a2)
-; RV32ZVE32F-NEXT: lw a2, 32(a2)
-; RV32ZVE32F-NEXT: sw s9, 28(sp)
-; RV32ZVE32F-NEXT: sw s10, 24(sp)
-; RV32ZVE32F-NEXT: sw s11, 20(sp)
-; RV32ZVE32F-NEXT: sw a2, 16(sp)
-; RV32ZVE32F-NEXT: sw s8, 12(sp)
-; RV32ZVE32F-NEXT: sw s7, 8(sp)
-; RV32ZVE32F-NEXT: sw s6, 4(sp)
-; RV32ZVE32F-NEXT: sw a0, 0(sp)
-; RV32ZVE32F-NEXT: mv a0, sp
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s2, 56(a2)
+; RV32ZVE32F-NEXT: lw s3, 48(a2)
+; RV32ZVE32F-NEXT: lw s4, 40(a2)
+; RV32ZVE32F-NEXT: lw s5, 32(a2)
+; RV32ZVE32F-NEXT: lw s6, 24(a2)
+; RV32ZVE32F-NEXT: lw s7, 0(a2)
+; RV32ZVE32F-NEXT: lw s8, 8(a2)
+; RV32ZVE32F-NEXT: lw a2, 16(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vle32.v v8, (a0)
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s6
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a0, v0
-; RV32ZVE32F-NEXT: andi a1, a0, 1
-; RV32ZVE32F-NEXT: bnez a1, .LBB51_10
+; RV32ZVE32F-NEXT: vmv.x.s a1, v0
+; RV32ZVE32F-NEXT: andi a2, a1, 1
+; RV32ZVE32F-NEXT: bnez a2, .LBB51_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
-; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB51_11
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: bnez a0, .LBB51_11
; RV32ZVE32F-NEXT: .LBB51_2: # %else2
-; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB51_12
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: bnez a0, .LBB51_12
; RV32ZVE32F-NEXT: .LBB51_3: # %else4
-; RV32ZVE32F-NEXT: andi a1, a0, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB51_13
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: bnez a0, .LBB51_13
; RV32ZVE32F-NEXT: .LBB51_4: # %else6
-; RV32ZVE32F-NEXT: andi a1, a0, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB51_14
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: bnez a0, .LBB51_14
; RV32ZVE32F-NEXT: .LBB51_5: # %else8
-; RV32ZVE32F-NEXT: andi a1, a0, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB51_15
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: bnez a0, .LBB51_15
; RV32ZVE32F-NEXT: .LBB51_6: # %else10
-; RV32ZVE32F-NEXT: andi a1, a0, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB51_16
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: bnez a0, .LBB51_16
; RV32ZVE32F-NEXT: .LBB51_7: # %else12
-; RV32ZVE32F-NEXT: andi a0, a0, -128
+; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB51_9
; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
@@ -5782,75 +5769,73 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB51_9: # %else14
-; RV32ZVE32F-NEXT: addi sp, s0, -96
-; RV32ZVE32F-NEXT: lw ra, 92(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s0, 88(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s2, 84(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s3, 80(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s4, 76(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s5, 72(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s6, 68(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s7, 64(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s8, 60(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s9, 56(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s10, 52(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s11, 48(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: addi sp, sp, 96
+; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: addi sp, sp, 48
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store
+; RV32ZVE32F-NEXT: lw a2, 4(a0)
+; RV32ZVE32F-NEXT: lw a0, 0(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: sw s5, 4(a1)
-; RV32ZVE32F-NEXT: sw s4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB51_2
+; RV32ZVE32F-NEXT: vmv.x.s s2, v8
+; RV32ZVE32F-NEXT: sw a2, 4(s2)
+; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_2
; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
-; RV32ZVE32F-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-NEXT: sw s3, 4(a1)
-; RV32ZVE32F-NEXT: sw s2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB51_3
+; RV32ZVE32F-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
+; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_3
; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
-; RV32ZVE32F-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a1)
-; RV32ZVE32F-NEXT: sw t5, 4(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB51_4
+; RV32ZVE32F-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-NEXT: sw t6, 0(a0)
+; RV32ZVE32F-NEXT: sw t5, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_4
; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV32ZVE32F-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a1)
-; RV32ZVE32F-NEXT: sw t3, 4(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB51_5
+; RV32ZVE32F-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-NEXT: sw t4, 0(a0)
+; RV32ZVE32F-NEXT: sw t3, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_5
; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
-; RV32ZVE32F-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a1)
-; RV32ZVE32F-NEXT: sw t1, 4(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB51_6
+; RV32ZVE32F-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-NEXT: sw t2, 0(a0)
+; RV32ZVE32F-NEXT: sw t1, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_6
; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
-; RV32ZVE32F-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-NEXT: sw t0, 0(a1)
-; RV32ZVE32F-NEXT: sw a7, 4(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB51_7
+; RV32ZVE32F-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-NEXT: sw t0, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_7
; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
-; RV32ZVE32F-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-NEXT: sw a6, 0(a1)
-; RV32ZVE32F-NEXT: sw a5, 4(a1)
-; RV32ZVE32F-NEXT: andi a0, a0, -128
+; RV32ZVE32F-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB51_8
; RV32ZVE32F-NEXT: j .LBB51_9
;
@@ -10381,121 +10366,106 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: addi sp, sp, -64
-; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 64
-; RV32ZVE32F-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: .cfi_offset ra, -4
-; RV32ZVE32F-NEXT: .cfi_offset s0, -8
-; RV32ZVE32F-NEXT: addi s0, sp, 64
-; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0
-; RV32ZVE32F-NEXT: andi sp, sp, -32
-; RV32ZVE32F-NEXT: lw a2, 0(a1)
-; RV32ZVE32F-NEXT: lw a3, 8(a1)
-; RV32ZVE32F-NEXT: lw a4, 16(a1)
-; RV32ZVE32F-NEXT: lw a5, 24(a1)
-; RV32ZVE32F-NEXT: lw a6, 56(a1)
-; RV32ZVE32F-NEXT: lw a7, 48(a1)
-; RV32ZVE32F-NEXT: lw t0, 40(a1)
-; RV32ZVE32F-NEXT: lw a1, 32(a1)
-; RV32ZVE32F-NEXT: sw a6, 28(sp)
-; RV32ZVE32F-NEXT: sw a7, 24(sp)
-; RV32ZVE32F-NEXT: sw t0, 20(sp)
-; RV32ZVE32F-NEXT: sw a1, 16(sp)
-; RV32ZVE32F-NEXT: sw a5, 12(sp)
-; RV32ZVE32F-NEXT: sw a4, 8(sp)
-; RV32ZVE32F-NEXT: sw a3, 4(sp)
-; RV32ZVE32F-NEXT: sw a2, 0(sp)
-; RV32ZVE32F-NEXT: mv a1, sp
+; RV32ZVE32F-NEXT: lw a2, 56(a1)
+; RV32ZVE32F-NEXT: lw a3, 48(a1)
+; RV32ZVE32F-NEXT: lw a4, 40(a1)
+; RV32ZVE32F-NEXT: lw a5, 32(a1)
+; RV32ZVE32F-NEXT: lw a6, 24(a1)
+; RV32ZVE32F-NEXT: lw a7, 0(a1)
+; RV32ZVE32F-NEXT: lw t0, 8(a1)
+; RV32ZVE32F-NEXT: lw a1, 16(a1)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vle32.v v8, (a1)
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_10
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_11
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_10
; RV32ZVE32F-NEXT: .LBB90_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_12
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_11
; RV32ZVE32F-NEXT: .LBB90_3: # %else4
; RV32ZVE32F-NEXT: andi a1, a0, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_13
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_12
; RV32ZVE32F-NEXT: .LBB90_4: # %else6
; RV32ZVE32F-NEXT: andi a1, a0, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_14
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_13
; RV32ZVE32F-NEXT: .LBB90_5: # %else8
; RV32ZVE32F-NEXT: andi a1, a0, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_15
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_14
; RV32ZVE32F-NEXT: .LBB90_6: # %else10
; RV32ZVE32F-NEXT: andi a1, a0, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_16
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_15
; RV32ZVE32F-NEXT: .LBB90_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB90_9
-; RV32ZVE32F-NEXT: .LBB90_8: # %cond.store13
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
-; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
-; RV32ZVE32F-NEXT: .LBB90_9: # %else14
-; RV32ZVE32F-NEXT: addi sp, s0, -64
-; RV32ZVE32F-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: addi sp, sp, 64
+; RV32ZVE32F-NEXT: bnez a0, .LBB90_16
+; RV32ZVE32F-NEXT: .LBB90_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store
+; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store
; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: beqz a1, .LBB90_2
-; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store1
+; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 4
; RV32ZVE32F-NEXT: beqz a1, .LBB90_3
-; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store3
+; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 8
; RV32ZVE32F-NEXT: beqz a1, .LBB90_4
-; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store5
+; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 16
; RV32ZVE32F-NEXT: beqz a1, .LBB90_5
-; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store7
+; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 32
; RV32ZVE32F-NEXT: beqz a1, .LBB90_6
-; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store9
+; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 64
; RV32ZVE32F-NEXT: beqz a1, .LBB90_7
-; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store11
+; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a0, a0, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB90_8
-; RV32ZVE32F-NEXT: j .LBB90_9
+; RV32ZVE32F-NEXT: beqz a0, .LBB90_8
+; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
+; RV32ZVE32F-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
+; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8f64:
; RV64ZVE32F: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index 543f35d3ad63..81a3d7141daa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -103,26 +103,23 @@ define void @store_v6i16(ptr %p, <6 x i16> %v) {
define void @store_v6f16(ptr %p, <6 x half> %v) {
; RV32-LABEL: store_v6f16:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: lh a2, 4(a1)
-; RV32-NEXT: lhu a3, 0(a1)
+; RV32-NEXT: lh a2, 20(a1)
+; RV32-NEXT: lhu a3, 16(a1)
; RV32-NEXT: slli a2, a2, 16
; RV32-NEXT: or a2, a3, a2
; RV32-NEXT: lh a3, 12(a1)
; RV32-NEXT: lhu a4, 8(a1)
-; RV32-NEXT: lh a5, 20(a1)
-; RV32-NEXT: lhu a1, 16(a1)
+; RV32-NEXT: lh a5, 4(a1)
+; RV32-NEXT: lhu a1, 0(a1)
; RV32-NEXT: slli a3, a3, 16
; RV32-NEXT: or a3, a4, a3
; RV32-NEXT: slli a5, a5, 16
; RV32-NEXT: or a1, a1, a5
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a1, sp
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vle32.v v8, (a1)
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: vslidedown.vi v9, v8, 2
@@ -131,7 +128,6 @@ define void @store_v6f16(ptr %p, <6 x half> %v) {
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: store_v6f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index 1b3216e15c9c..acb8477fa864 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -5,29 +5,27 @@
define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vv_v6i32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: lbu a2, 0(a2)
; RV32-NEXT: vle32.v v8, (a1)
-; RV32-NEXT: srli a1, a2, 5
-; RV32-NEXT: sb a1, 13(sp)
; RV32-NEXT: andi a1, a2, 1
-; RV32-NEXT: sb a1, 8(sp)
-; RV32-NEXT: slli a1, a2, 27
+; RV32-NEXT: vslide1down.vx v10, v8, a1
+; RV32-NEXT: slli a1, a2, 30
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 29
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 12(sp)
+; RV32-NEXT: vslide1down.vx v10, v10, a1
; RV32-NEXT: slli a1, a2, 28
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 11(sp)
-; RV32-NEXT: slli a1, a2, 29
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 27
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 10(sp)
-; RV32-NEXT: slli a2, a2, 30
-; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 9(sp)
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vle8.v v10, (a1)
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: srli a2, a2, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a2
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
@@ -39,34 +37,31 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vse32.v v10, (a0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vse32.v v8, (a3)
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vv_v6i32:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: lbu a2, 0(a2)
; RV64-NEXT: vle32.v v8, (a1)
-; RV64-NEXT: srli a1, a2, 5
-; RV64-NEXT: sb a1, 13(sp)
; RV64-NEXT: andi a1, a2, 1
-; RV64-NEXT: sb a1, 8(sp)
-; RV64-NEXT: slli a1, a2, 59
+; RV64-NEXT: vslide1down.vx v10, v8, a1
+; RV64-NEXT: slli a1, a2, 62
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 61
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 12(sp)
+; RV64-NEXT: vslide1down.vx v10, v10, a1
; RV64-NEXT: slli a1, a2, 60
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 11(sp)
-; RV64-NEXT: slli a1, a2, 61
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 59
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 10(sp)
-; RV64-NEXT: slli a2, a2, 62
-; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 9(sp)
-; RV64-NEXT: addi a1, sp, 8
-; RV64-NEXT: vle8.v v10, (a1)
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: srli a2, a2, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a2
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
@@ -77,7 +72,6 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vse64.v v10, (a0)
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a3)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%va = load <6 x i32>, ptr %a
%vb = load <6 x i32>, ptr %b
@@ -90,29 +84,27 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vx_v6i32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: lbu a2, 0(a2)
; RV32-NEXT: vle32.v v8, (a1)
-; RV32-NEXT: srli a1, a2, 5
-; RV32-NEXT: sb a1, 13(sp)
; RV32-NEXT: andi a1, a2, 1
-; RV32-NEXT: sb a1, 8(sp)
-; RV32-NEXT: slli a1, a2, 27
+; RV32-NEXT: vslide1down.vx v10, v8, a1
+; RV32-NEXT: slli a1, a2, 30
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 29
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 12(sp)
+; RV32-NEXT: vslide1down.vx v10, v10, a1
; RV32-NEXT: slli a1, a2, 28
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 11(sp)
-; RV32-NEXT: slli a1, a2, 29
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 27
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 10(sp)
-; RV32-NEXT: slli a2, a2, 30
-; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 9(sp)
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vle8.v v10, (a1)
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: srli a2, a2, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a2
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -124,34 +116,31 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vse32.v v10, (a0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vse32.v v8, (a3)
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vx_v6i32:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: lbu a2, 0(a2)
; RV64-NEXT: vle32.v v8, (a1)
-; RV64-NEXT: srli a1, a2, 5
-; RV64-NEXT: sb a1, 13(sp)
; RV64-NEXT: andi a1, a2, 1
-; RV64-NEXT: sb a1, 8(sp)
-; RV64-NEXT: slli a1, a2, 59
+; RV64-NEXT: vslide1down.vx v10, v8, a1
+; RV64-NEXT: slli a1, a2, 62
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 12(sp)
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 61
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
; RV64-NEXT: slli a1, a2, 60
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 11(sp)
-; RV64-NEXT: slli a1, a2, 61
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 59
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 10(sp)
-; RV64-NEXT: slli a2, a2, 62
-; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 9(sp)
-; RV64-NEXT: addi a1, sp, 8
-; RV64-NEXT: vle8.v v10, (a1)
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: srli a2, a2, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a2
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -162,7 +151,6 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vse64.v v10, (a0)
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a3)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%vb = load <6 x i32>, ptr %b
%ahead = insertelement <6 x i32> poison, i32 %a, i32 0
@@ -176,29 +164,27 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vi_v6i32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: lbu a1, 0(a1)
; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: srli a0, a1, 5
-; RV32-NEXT: sb a0, 13(sp)
; RV32-NEXT: andi a0, a1, 1
-; RV32-NEXT: sb a0, 8(sp)
-; RV32-NEXT: slli a0, a1, 27
+; RV32-NEXT: vslide1down.vx v10, v8, a0
+; RV32-NEXT: slli a0, a1, 30
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 12(sp)
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 29
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: slli a0, a1, 28
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 11(sp)
-; RV32-NEXT: slli a0, a1, 29
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 27
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 10(sp)
-; RV32-NEXT: slli a1, a1, 30
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 9(sp)
-; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vle8.v v10, (a0)
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: srli a1, a1, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -210,34 +196,31 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vse32.v v10, (a0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vse32.v v8, (a2)
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vi_v6i32:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: lbu a1, 0(a1)
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: srli a0, a1, 5
-; RV64-NEXT: sb a0, 13(sp)
; RV64-NEXT: andi a0, a1, 1
-; RV64-NEXT: sb a0, 8(sp)
-; RV64-NEXT: slli a0, a1, 59
+; RV64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-NEXT: slli a0, a1, 62
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 61
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 12(sp)
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: slli a0, a1, 60
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 11(sp)
-; RV64-NEXT: slli a0, a1, 61
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 59
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 10(sp)
-; RV64-NEXT: slli a1, a1, 62
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 9(sp)
-; RV64-NEXT: addi a0, sp, 8
-; RV64-NEXT: vle8.v v10, (a0)
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: srli a1, a1, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -248,7 +231,6 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vse64.v v10, (a0)
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a2)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%vb = load <6 x i32>, ptr %b
%a = insertelement <6 x i32> poison, i32 -1, i32 0
@@ -263,29 +245,27 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vv_v6f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: lbu a2, 0(a2)
; RV32-NEXT: vle32.v v8, (a1)
-; RV32-NEXT: srli a1, a2, 5
-; RV32-NEXT: sb a1, 13(sp)
; RV32-NEXT: andi a1, a2, 1
-; RV32-NEXT: sb a1, 8(sp)
-; RV32-NEXT: slli a1, a2, 27
+; RV32-NEXT: vslide1down.vx v10, v8, a1
+; RV32-NEXT: slli a1, a2, 30
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 12(sp)
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 29
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
; RV32-NEXT: slli a1, a2, 28
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 11(sp)
-; RV32-NEXT: slli a1, a2, 29
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 27
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 10(sp)
-; RV32-NEXT: slli a2, a2, 30
-; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 9(sp)
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vle8.v v10, (a1)
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: srli a2, a2, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a2
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
@@ -297,34 +277,31 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vse32.v v10, (a0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vse32.v v8, (a3)
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vv_v6f32:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: lbu a2, 0(a2)
; RV64-NEXT: vle32.v v8, (a1)
-; RV64-NEXT: srli a1, a2, 5
-; RV64-NEXT: sb a1, 13(sp)
; RV64-NEXT: andi a1, a2, 1
-; RV64-NEXT: sb a1, 8(sp)
-; RV64-NEXT: slli a1, a2, 59
+; RV64-NEXT: vslide1down.vx v10, v8, a1
+; RV64-NEXT: slli a1, a2, 62
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 12(sp)
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 61
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
; RV64-NEXT: slli a1, a2, 60
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 11(sp)
-; RV64-NEXT: slli a1, a2, 61
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 59
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 10(sp)
-; RV64-NEXT: slli a2, a2, 62
-; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 9(sp)
-; RV64-NEXT: addi a1, sp, 8
-; RV64-NEXT: vle8.v v10, (a1)
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: srli a2, a2, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a2
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
@@ -335,7 +312,6 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vse64.v v10, (a0)
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a3)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%va = load <6 x float>, ptr %a
%vb = load <6 x float>, ptr %b
@@ -348,29 +324,27 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vx_v6f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: lbu a1, 0(a1)
; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: srli a0, a1, 5
-; RV32-NEXT: sb a0, 13(sp)
; RV32-NEXT: andi a0, a1, 1
-; RV32-NEXT: sb a0, 8(sp)
-; RV32-NEXT: slli a0, a1, 27
+; RV32-NEXT: vslide1down.vx v10, v8, a0
+; RV32-NEXT: slli a0, a1, 30
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 29
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 12(sp)
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: slli a0, a1, 28
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 11(sp)
-; RV32-NEXT: slli a0, a1, 29
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 27
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 10(sp)
-; RV32-NEXT: slli a1, a1, 30
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 9(sp)
-; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vle8.v v10, (a0)
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: srli a1, a1, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -382,34 +356,31 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vse32.v v10, (a0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vse32.v v8, (a2)
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vx_v6f32:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: lbu a1, 0(a1)
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: srli a0, a1, 5
-; RV64-NEXT: sb a0, 13(sp)
; RV64-NEXT: andi a0, a1, 1
-; RV64-NEXT: sb a0, 8(sp)
-; RV64-NEXT: slli a0, a1, 59
+; RV64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-NEXT: slli a0, a1, 62
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 12(sp)
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 61
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: slli a0, a1, 60
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 11(sp)
-; RV64-NEXT: slli a0, a1, 61
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 59
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 10(sp)
-; RV64-NEXT: slli a1, a1, 62
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 9(sp)
-; RV64-NEXT: addi a0, sp, 8
-; RV64-NEXT: vle8.v v10, (a0)
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: srli a1, a1, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -420,7 +391,6 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vse64.v v10, (a0)
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a2)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%vb = load <6 x float>, ptr %b
%ahead = insertelement <6 x float> poison, float %a, i32 0
@@ -434,29 +404,27 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vfpzero_v6f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: lbu a1, 0(a1)
; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: srli a0, a1, 5
-; RV32-NEXT: sb a0, 13(sp)
; RV32-NEXT: andi a0, a1, 1
-; RV32-NEXT: sb a0, 8(sp)
-; RV32-NEXT: slli a0, a1, 27
+; RV32-NEXT: vslide1down.vx v10, v8, a0
+; RV32-NEXT: slli a0, a1, 30
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 29
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 12(sp)
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: slli a0, a1, 28
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 11(sp)
-; RV32-NEXT: slli a0, a1, 29
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 27
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 10(sp)
-; RV32-NEXT: slli a1, a1, 30
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 9(sp)
-; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vle8.v v10, (a0)
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: srli a1, a1, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -468,34 +436,31 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vse32.v v10, (a0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vse32.v v8, (a2)
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vfpzero_v6f32:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: lbu a1, 0(a1)
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: srli a0, a1, 5
-; RV64-NEXT: sb a0, 13(sp)
; RV64-NEXT: andi a0, a1, 1
-; RV64-NEXT: sb a0, 8(sp)
-; RV64-NEXT: slli a0, a1, 59
+; RV64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-NEXT: slli a0, a1, 62
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 61
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 12(sp)
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: slli a0, a1, 60
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 11(sp)
-; RV64-NEXT: slli a0, a1, 61
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 59
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 10(sp)
-; RV64-NEXT: slli a1, a1, 62
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 9(sp)
-; RV64-NEXT: addi a0, sp, 8
-; RV64-NEXT: vle8.v v10, (a0)
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: srli a1, a1, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -506,7 +471,6 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vse64.v v10, (a0)
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a2)
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%vb = load <6 x float>, ptr %b
%a = insertelement <6 x float> poison, float 0.0, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index 1c8463978cf2..67eb190e8cb3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -7,242 +7,230 @@
define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) {
; RV32-LABEL: vector_deinterleave_v16i1_v32i1:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: vfirst.m a0, v0
; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: sb a0, 16(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV32-NEXT: vmv.x.s a0, v0
-; RV32-NEXT: slli a1, a0, 17
+; RV32-NEXT: slli a1, a0, 29
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 23(sp)
-; RV32-NEXT: slli a1, a0, 19
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: slli a1, a0, 27
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 22(sp)
-; RV32-NEXT: slli a1, a0, 21
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: slli a1, a0, 25
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 21(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: slli a1, a0, 23
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 20(sp)
-; RV32-NEXT: slli a1, a0, 25
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: slli a1, a0, 21
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 19(sp)
-; RV32-NEXT: slli a1, a0, 27
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: slli a1, a0, 19
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 18(sp)
-; RV32-NEXT: slli a1, a0, 29
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: slli a1, a0, 17
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 17(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v0, 2
+; RV32-NEXT: vslidedown.vi v9, v0, 2
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vfirst.m a1, v8
+; RV32-NEXT: vfirst.m a1, v9
; RV32-NEXT: seqz a1, a1
-; RV32-NEXT: sb a1, 24(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: slli a2, a1, 17
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: slli a2, a1, 29
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 31(sp)
-; RV32-NEXT: slli a2, a1, 19
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 27
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 30(sp)
-; RV32-NEXT: slli a2, a1, 21
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 25
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 29(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: slli a2, a1, 23
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 28(sp)
-; RV32-NEXT: slli a2, a1, 25
-; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 27(sp)
-; RV32-NEXT: slli a2, a1, 27
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 21
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 26(sp)
-; RV32-NEXT: slli a2, a1, 29
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 19
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 25(sp)
-; RV32-NEXT: slli a2, a0, 16
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 17
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 7(sp)
-; RV32-NEXT: slli a2, a0, 18
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vand.vi v8, v8, 1
+; RV32-NEXT: vmsne.vi v0, v8, 0
+; RV32-NEXT: slli a2, a0, 30
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 6(sp)
-; RV32-NEXT: slli a2, a0, 20
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 28
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 5(sp)
-; RV32-NEXT: slli a2, a0, 22
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 26
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 4(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: slli a2, a0, 24
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 3(sp)
-; RV32-NEXT: slli a2, a0, 26
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 22
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 2(sp)
-; RV32-NEXT: slli a2, a0, 28
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 20
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 1(sp)
-; RV32-NEXT: slli a0, a0, 30
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 18
+; RV32-NEXT: srli a2, a2, 31
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 0(sp)
-; RV32-NEXT: slli a0, a1, 16
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: slli a0, a1, 30
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 15(sp)
-; RV32-NEXT: slli a0, a1, 18
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 14(sp)
-; RV32-NEXT: slli a0, a1, 20
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: slli a0, a1, 28
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 13(sp)
-; RV32-NEXT: slli a0, a1, 22
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: slli a0, a1, 26
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 12(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: slli a0, a1, 24
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 11(sp)
-; RV32-NEXT: slli a0, a1, 26
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: slli a0, a1, 22
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 10(sp)
-; RV32-NEXT: slli a0, a1, 28
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: slli a0, a1, 20
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: slli a0, a1, 18
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 9(sp)
-; RV32-NEXT: slli a1, a1, 30
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: slli a1, a1, 16
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 8(sp)
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vle8.v v9, (a0)
+; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vand.vi v8, v8, 1
-; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: vand.vi v8, v9, 1
; RV32-NEXT: vmsne.vi v8, v8, 0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vector_deinterleave_v16i1_v32i1:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -32
-; RV64-NEXT: .cfi_def_cfa_offset 32
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64-NEXT: vfirst.m a0, v0
; RV64-NEXT: seqz a0, a0
-; RV64-NEXT: sb a0, 16(sp)
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
; RV64-NEXT: vmv.x.s a0, v0
-; RV64-NEXT: slli a1, a0, 49
+; RV64-NEXT: slli a1, a0, 61
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 23(sp)
-; RV64-NEXT: slli a1, a0, 51
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: slli a1, a0, 59
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 22(sp)
-; RV64-NEXT: slli a1, a0, 53
+; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: slli a1, a0, 57
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 21(sp)
+; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: slli a1, a0, 55
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 20(sp)
-; RV64-NEXT: slli a1, a0, 57
+; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: slli a1, a0, 53
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 19(sp)
-; RV64-NEXT: slli a1, a0, 59
+; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: slli a1, a0, 51
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 18(sp)
-; RV64-NEXT: slli a1, a0, 61
+; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: slli a1, a0, 49
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 17(sp)
+; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v0, 2
+; RV64-NEXT: vslidedown.vi v9, v0, 2
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vfirst.m a1, v8
+; RV64-NEXT: vfirst.m a1, v9
; RV64-NEXT: seqz a1, a1
-; RV64-NEXT: sb a1, 24(sp)
+; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: slli a2, a1, 49
+; RV64-NEXT: vmv.x.s a1, v9
+; RV64-NEXT: slli a2, a1, 61
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 31(sp)
-; RV64-NEXT: slli a2, a1, 51
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 59
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 30(sp)
-; RV64-NEXT: slli a2, a1, 53
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 57
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 29(sp)
+; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: slli a2, a1, 55
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 28(sp)
-; RV64-NEXT: slli a2, a1, 57
-; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 27(sp)
-; RV64-NEXT: slli a2, a1, 59
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 53
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 26(sp)
-; RV64-NEXT: slli a2, a1, 61
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 51
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 25(sp)
-; RV64-NEXT: slli a2, a0, 48
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 49
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 7(sp)
-; RV64-NEXT: slli a2, a0, 50
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: vand.vi v8, v8, 1
+; RV64-NEXT: vmsne.vi v0, v8, 0
+; RV64-NEXT: slli a2, a0, 62
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 6(sp)
-; RV64-NEXT: slli a2, a0, 52
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 60
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 5(sp)
-; RV64-NEXT: slli a2, a0, 54
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 58
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 4(sp)
+; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: slli a2, a0, 56
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 3(sp)
-; RV64-NEXT: slli a2, a0, 58
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 54
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 2(sp)
-; RV64-NEXT: slli a2, a0, 60
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 52
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 1(sp)
-; RV64-NEXT: slli a0, a0, 62
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 50
+; RV64-NEXT: srli a2, a2, 63
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 0(sp)
-; RV64-NEXT: slli a0, a1, 48
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: slli a0, a1, 62
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 15(sp)
-; RV64-NEXT: slli a0, a1, 50
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 14(sp)
-; RV64-NEXT: slli a0, a1, 52
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: slli a0, a1, 60
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 13(sp)
-; RV64-NEXT: slli a0, a1, 54
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: slli a0, a1, 58
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 12(sp)
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: slli a0, a1, 56
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 11(sp)
-; RV64-NEXT: slli a0, a1, 58
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: slli a0, a1, 54
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 10(sp)
-; RV64-NEXT: slli a0, a1, 60
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: slli a0, a1, 52
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: slli a0, a1, 50
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 9(sp)
-; RV64-NEXT: slli a1, a1, 62
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: slli a1, a1, 48
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 8(sp)
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vle8.v v8, (a0)
-; RV64-NEXT: mv a0, sp
-; RV64-NEXT: vle8.v v9, (a0)
+; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vand.vi v8, v8, 1
-; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: vand.vi v8, v9, 1
; RV64-NEXT: vmsne.vi v8, v8, 0
-; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: ret
%retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec)
ret {<16 x i1>, <16 x i1>} %retval
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index aa340952749c..ab2882fe95f9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -7,248 +7,228 @@
define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) {
; RV32-LABEL: vector_interleave_v32i1_v16i1:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -64
-; RV32-NEXT: .cfi_def_cfa_offset 64
-; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 64
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: andi sp, sp, -32
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vfirst.m a0, v8
-; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: sb a0, 1(sp)
; RV32-NEXT: vfirst.m a0, v0
; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: sb a0, 0(sp)
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v8, a0
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vfirst.m a0, v8
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: slli a1, a0, 16
+; RV32-NEXT: vmv.x.s a0, v0
+; RV32-NEXT: slli a1, a0, 30
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 31(sp)
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: slli a2, a1, 16
-; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 30(sp)
-; RV32-NEXT: slli a2, a0, 17
+; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: slli a3, a1, 30
+; RV32-NEXT: srli a3, a3, 31
+; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v10, a3
+; RV32-NEXT: slli a2, a0, 29
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 29(sp)
-; RV32-NEXT: slli a2, a1, 17
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 29
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 28(sp)
-; RV32-NEXT: slli a2, a0, 18
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 28
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 27(sp)
-; RV32-NEXT: slli a2, a1, 18
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 28
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 26(sp)
-; RV32-NEXT: slli a2, a0, 19
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 27
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 25(sp)
-; RV32-NEXT: slli a2, a1, 19
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 27
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 24(sp)
-; RV32-NEXT: slli a2, a0, 20
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 26
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 23(sp)
-; RV32-NEXT: slli a2, a1, 20
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 26
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 22(sp)
-; RV32-NEXT: slli a2, a0, 21
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 25
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 21(sp)
-; RV32-NEXT: slli a2, a1, 21
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 25
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 20(sp)
-; RV32-NEXT: slli a2, a0, 22
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 24
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 19(sp)
-; RV32-NEXT: slli a2, a1, 22
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 24
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 18(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: slli a2, a0, 23
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 17(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: slli a2, a1, 23
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 16(sp)
-; RV32-NEXT: slli a2, a0, 24
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 22
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 15(sp)
-; RV32-NEXT: slli a2, a1, 24
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 22
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 14(sp)
-; RV32-NEXT: slli a2, a0, 25
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 21
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 13(sp)
-; RV32-NEXT: slli a2, a1, 25
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 21
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 12(sp)
-; RV32-NEXT: slli a2, a0, 26
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 20
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 11(sp)
-; RV32-NEXT: slli a2, a1, 26
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 20
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 10(sp)
-; RV32-NEXT: slli a2, a0, 27
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 19
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 9(sp)
-; RV32-NEXT: slli a2, a1, 27
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 19
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 8(sp)
-; RV32-NEXT: slli a2, a0, 28
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 18
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 7(sp)
-; RV32-NEXT: slli a2, a1, 28
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 18
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 6(sp)
-; RV32-NEXT: slli a2, a0, 29
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a0, 17
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 5(sp)
-; RV32-NEXT: slli a2, a1, 29
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a2, a1, 17
; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: sb a2, 4(sp)
-; RV32-NEXT: slli a0, a0, 30
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: sb a0, 3(sp)
-; RV32-NEXT: slli a1, a1, 30
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: slli a1, a1, 16
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 2(sp)
-; RV32-NEXT: li a0, 32
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV32-NEXT: vle8.v v8, (a1)
+; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vand.vi v8, v8, 1
; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: addi sp, s0, -64
-; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: ret
;
; RV64-LABEL: vector_interleave_v32i1_v16i1:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -64
-; RV64-NEXT: .cfi_def_cfa_offset 64
-; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 64
-; RV64-NEXT: .cfi_def_cfa s0, 0
-; RV64-NEXT: andi sp, sp, -32
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vfirst.m a0, v8
-; RV64-NEXT: seqz a0, a0
-; RV64-NEXT: sb a0, 1(sp)
; RV64-NEXT: vfirst.m a0, v0
; RV64-NEXT: seqz a0, a0
-; RV64-NEXT: sb a0, 0(sp)
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vfirst.m a0, v8
+; RV64-NEXT: seqz a0, a0
+; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
-; RV64-NEXT: vmv.x.s a0, v8
-; RV64-NEXT: slli a1, a0, 48
+; RV64-NEXT: vmv.x.s a0, v0
+; RV64-NEXT: slli a1, a0, 62
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 31(sp)
-; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: slli a2, a1, 48
-; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 30(sp)
-; RV64-NEXT: slli a2, a0, 49
+; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: vsetivli zero, 0, e16, mf4, ta, ma
+; RV64-NEXT: vmv.x.s a1, v8
+; RV64-NEXT: slli a3, a1, 62
+; RV64-NEXT: srli a3, a3, 63
+; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v10, a3
+; RV64-NEXT: slli a2, a0, 61
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 29(sp)
-; RV64-NEXT: slli a2, a1, 49
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 61
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 28(sp)
-; RV64-NEXT: slli a2, a0, 50
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 60
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 27(sp)
-; RV64-NEXT: slli a2, a1, 50
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 60
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 26(sp)
-; RV64-NEXT: slli a2, a0, 51
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 59
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 25(sp)
-; RV64-NEXT: slli a2, a1, 51
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 59
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 24(sp)
-; RV64-NEXT: slli a2, a0, 52
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 58
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 23(sp)
-; RV64-NEXT: slli a2, a1, 52
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 58
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 22(sp)
-; RV64-NEXT: slli a2, a0, 53
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 57
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 21(sp)
-; RV64-NEXT: slli a2, a1, 53
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 57
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 20(sp)
-; RV64-NEXT: slli a2, a0, 54
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 56
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 19(sp)
-; RV64-NEXT: slli a2, a1, 54
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 56
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 18(sp)
+; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: slli a2, a0, 55
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 17(sp)
+; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: slli a2, a1, 55
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 16(sp)
-; RV64-NEXT: slli a2, a0, 56
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 54
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 15(sp)
-; RV64-NEXT: slli a2, a1, 56
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 54
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 14(sp)
-; RV64-NEXT: slli a2, a0, 57
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 53
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 13(sp)
-; RV64-NEXT: slli a2, a1, 57
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 53
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 12(sp)
-; RV64-NEXT: slli a2, a0, 58
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 52
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 11(sp)
-; RV64-NEXT: slli a2, a1, 58
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 52
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 10(sp)
-; RV64-NEXT: slli a2, a0, 59
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 51
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 9(sp)
-; RV64-NEXT: slli a2, a1, 59
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 51
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 8(sp)
-; RV64-NEXT: slli a2, a0, 60
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 50
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 7(sp)
-; RV64-NEXT: slli a2, a1, 60
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 50
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 6(sp)
-; RV64-NEXT: slli a2, a0, 61
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a0, 49
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 5(sp)
-; RV64-NEXT: slli a2, a1, 61
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a2, a1, 49
; RV64-NEXT: srli a2, a2, 63
-; RV64-NEXT: sb a2, 4(sp)
-; RV64-NEXT: slli a0, a0, 62
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 3(sp)
-; RV64-NEXT: slli a1, a1, 62
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: slli a1, a1, 48
; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sb a1, 2(sp)
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: mv a1, sp
-; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; RV64-NEXT: vle8.v v8, (a1)
+; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vand.vi v8, v8, 1
; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: addi sp, s0, -64
-; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 64
; RV64-NEXT: ret
%res = call <32 x i1> @llvm.experimental.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b)
ret <32 x i1> %res
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index fc7f1f588369..34210dab3854 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -597,60 +597,69 @@ define void @test_srem_vec(ptr %X) nounwind {
;
; RV32MV-LABEL: test_srem_vec:
; RV32MV: # %bb.0:
-; RV32MV-NEXT: addi sp, sp, -64
-; RV32MV-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32MV-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32MV-NEXT: sw s2, 52(sp) # 4-byte Folded Spill
-; RV32MV-NEXT: sw s3, 48(sp) # 4-byte Folded Spill
-; RV32MV-NEXT: sw s4, 44(sp) # 4-byte Folded Spill
-; RV32MV-NEXT: sw s5, 40(sp) # 4-byte Folded Spill
-; RV32MV-NEXT: sw s6, 36(sp) # 4-byte Folded Spill
-; RV32MV-NEXT: addi s0, sp, 64
-; RV32MV-NEXT: andi sp, sp, -32
-; RV32MV-NEXT: mv s2, a0
-; RV32MV-NEXT: lw a0, 8(a0)
-; RV32MV-NEXT: lw a1, 4(s2)
-; RV32MV-NEXT: lbu a2, 12(s2)
-; RV32MV-NEXT: slli a3, a0, 31
-; RV32MV-NEXT: srli a4, a1, 1
-; RV32MV-NEXT: or s3, a4, a3
-; RV32MV-NEXT: slli a3, a2, 30
-; RV32MV-NEXT: srli a4, a0, 2
-; RV32MV-NEXT: or s4, a4, a3
-; RV32MV-NEXT: srli a0, a0, 1
+; RV32MV-NEXT: addi sp, sp, -48
+; RV32MV-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32MV-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32MV-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32MV-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32MV-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32MV-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; RV32MV-NEXT: csrr a1, vlenb
+; RV32MV-NEXT: slli a1, a1, 1
+; RV32MV-NEXT: sub sp, sp, a1
+; RV32MV-NEXT: mv s0, a0
+; RV32MV-NEXT: lbu a0, 12(a0)
+; RV32MV-NEXT: lw a1, 8(s0)
+; RV32MV-NEXT: slli a2, a0, 30
+; RV32MV-NEXT: lw a3, 4(s0)
+; RV32MV-NEXT: srli s1, a1, 2
+; RV32MV-NEXT: or s1, s1, a2
+; RV32MV-NEXT: slli a2, a1, 31
+; RV32MV-NEXT: srli a4, a3, 1
+; RV32MV-NEXT: or s2, a4, a2
+; RV32MV-NEXT: srli a0, a0, 2
; RV32MV-NEXT: slli a0, a0, 31
-; RV32MV-NEXT: srai s5, a0, 31
-; RV32MV-NEXT: srli a2, a2, 2
-; RV32MV-NEXT: slli a2, a2, 31
-; RV32MV-NEXT: lw a0, 0(s2)
-; RV32MV-NEXT: srai s6, a2, 31
+; RV32MV-NEXT: srai s3, a0, 31
+; RV32MV-NEXT: srli a1, a1, 1
; RV32MV-NEXT: slli a1, a1, 31
+; RV32MV-NEXT: lw a0, 0(s0)
+; RV32MV-NEXT: srai s4, a1, 31
+; RV32MV-NEXT: slli a1, a3, 31
; RV32MV-NEXT: srai a1, a1, 31
; RV32MV-NEXT: li a2, 6
; RV32MV-NEXT: li a3, 0
; RV32MV-NEXT: call __moddi3 at plt
-; RV32MV-NEXT: sw a1, 4(sp)
-; RV32MV-NEXT: sw a0, 0(sp)
-; RV32MV-NEXT: li a2, -5
-; RV32MV-NEXT: li a3, -1
-; RV32MV-NEXT: mv a0, s4
-; RV32MV-NEXT: mv a1, s6
-; RV32MV-NEXT: call __moddi3 at plt
-; RV32MV-NEXT: sw a1, 20(sp)
-; RV32MV-NEXT: sw a0, 16(sp)
+; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32MV-NEXT: vslide1down.vx v8, v8, a0
+; RV32MV-NEXT: vslide1down.vx v8, v8, a1
+; RV32MV-NEXT: addi a0, sp, 16
+; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; RV32MV-NEXT: li a2, 7
-; RV32MV-NEXT: mv a0, s3
-; RV32MV-NEXT: mv a1, s5
+; RV32MV-NEXT: mv a0, s2
+; RV32MV-NEXT: mv a1, s4
; RV32MV-NEXT: li a3, 0
; RV32MV-NEXT: call __moddi3 at plt
-; RV32MV-NEXT: sw a1, 12(sp)
-; RV32MV-NEXT: sw a0, 8(sp)
+; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32MV-NEXT: addi a2, sp, 16
+; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32MV-NEXT: vslide1down.vx v8, v8, a0
+; RV32MV-NEXT: vslide1down.vx v8, v8, a1
+; RV32MV-NEXT: addi a0, sp, 16
+; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32MV-NEXT: li a2, -5
+; RV32MV-NEXT: li a3, -1
+; RV32MV-NEXT: mv a0, s1
+; RV32MV-NEXT: mv a1, s3
+; RV32MV-NEXT: call __moddi3 at plt
+; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32MV-NEXT: addi a2, sp, 16
+; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32MV-NEXT: vslide1down.vx v8, v8, a0
+; RV32MV-NEXT: vslide1down.vx v8, v8, a1
+; RV32MV-NEXT: vslide1down.vx v8, v8, a0
+; RV32MV-NEXT: vslide1down.vx v8, v8, a0
; RV32MV-NEXT: li a0, 85
-; RV32MV-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; RV32MV-NEXT: vmv.s.x v0, a0
-; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT: mv a0, sp
-; RV32MV-NEXT: vle32.v v8, (a0)
; RV32MV-NEXT: vmv.v.i v10, 1
; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0
; RV32MV-NEXT: vand.vv v8, v8, v10
@@ -668,14 +677,14 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: vmv.v.i v8, 0
; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0
; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32MV-NEXT: vse32.v v8, (s2)
+; RV32MV-NEXT: vse32.v v8, (s0)
; RV32MV-NEXT: vslidedown.vi v10, v8, 1
; RV32MV-NEXT: vmv.x.s a0, v10
; RV32MV-NEXT: vslidedown.vi v10, v8, 2
; RV32MV-NEXT: vmv.x.s a1, v10
; RV32MV-NEXT: slli a2, a1, 1
; RV32MV-NEXT: sub a2, a2, a0
-; RV32MV-NEXT: sw a2, 4(s2)
+; RV32MV-NEXT: sw a2, 4(s0)
; RV32MV-NEXT: vslidedown.vi v10, v8, 4
; RV32MV-NEXT: vmv.x.s a0, v10
; RV32MV-NEXT: srli a2, a0, 30
@@ -684,7 +693,7 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: slli a3, a3, 2
; RV32MV-NEXT: or a2, a3, a2
; RV32MV-NEXT: andi a2, a2, 7
-; RV32MV-NEXT: sb a2, 12(s2)
+; RV32MV-NEXT: sb a2, 12(s0)
; RV32MV-NEXT: srli a1, a1, 31
; RV32MV-NEXT: vslidedown.vi v8, v8, 3
; RV32MV-NEXT: vmv.x.s a2, v8
@@ -693,70 +702,65 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: slli a0, a0, 2
; RV32MV-NEXT: or a0, a1, a0
; RV32MV-NEXT: or a0, a0, a2
-; RV32MV-NEXT: sw a0, 8(s2)
-; RV32MV-NEXT: addi sp, s0, -64
-; RV32MV-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32MV-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32MV-NEXT: lw s2, 52(sp) # 4-byte Folded Reload
-; RV32MV-NEXT: lw s3, 48(sp) # 4-byte Folded Reload
-; RV32MV-NEXT: lw s4, 44(sp) # 4-byte Folded Reload
-; RV32MV-NEXT: lw s5, 40(sp) # 4-byte Folded Reload
-; RV32MV-NEXT: lw s6, 36(sp) # 4-byte Folded Reload
-; RV32MV-NEXT: addi sp, sp, 64
+; RV32MV-NEXT: sw a0, 8(s0)
+; RV32MV-NEXT: csrr a0, vlenb
+; RV32MV-NEXT: slli a0, a0, 1
+; RV32MV-NEXT: add sp, sp, a0
+; RV32MV-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32MV-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32MV-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32MV-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32MV-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32MV-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32MV-NEXT: addi sp, sp, 48
; RV32MV-NEXT: ret
;
; RV64MV-LABEL: test_srem_vec:
; RV64MV: # %bb.0:
-; RV64MV-NEXT: addi sp, sp, -64
-; RV64MV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64MV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64MV-NEXT: addi s0, sp, 64
-; RV64MV-NEXT: andi sp, sp, -32
-; RV64MV-NEXT: lbu a1, 12(a0)
+; RV64MV-NEXT: ld a1, 0(a0)
; RV64MV-NEXT: lwu a2, 8(a0)
-; RV64MV-NEXT: slli a1, a1, 32
-; RV64MV-NEXT: ld a3, 0(a0)
-; RV64MV-NEXT: or a1, a2, a1
-; RV64MV-NEXT: slli a1, a1, 29
-; RV64MV-NEXT: srai a1, a1, 31
-; RV64MV-NEXT: srli a4, a3, 2
-; RV64MV-NEXT: slli a2, a2, 62
-; RV64MV-NEXT: lui a5, %hi(.LCPI3_0)
-; RV64MV-NEXT: ld a5, %lo(.LCPI3_0)(a5)
-; RV64MV-NEXT: or a2, a2, a4
-; RV64MV-NEXT: slli a3, a3, 31
+; RV64MV-NEXT: srli a3, a1, 2
+; RV64MV-NEXT: lbu a4, 12(a0)
+; RV64MV-NEXT: slli a5, a2, 62
+; RV64MV-NEXT: or a3, a5, a3
; RV64MV-NEXT: srai a3, a3, 31
-; RV64MV-NEXT: mulh a4, a3, a5
+; RV64MV-NEXT: slli a4, a4, 32
+; RV64MV-NEXT: or a2, a2, a4
+; RV64MV-NEXT: slli a2, a2, 29
+; RV64MV-NEXT: lui a4, %hi(.LCPI3_0)
+; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4)
+; RV64MV-NEXT: srai a2, a2, 31
+; RV64MV-NEXT: slli a1, a1, 31
+; RV64MV-NEXT: srai a1, a1, 31
+; RV64MV-NEXT: mulh a4, a2, a4
; RV64MV-NEXT: srli a5, a4, 63
+; RV64MV-NEXT: srai a4, a4, 1
; RV64MV-NEXT: add a4, a4, a5
-; RV64MV-NEXT: li a5, 6
-; RV64MV-NEXT: mul a4, a4, a5
; RV64MV-NEXT: lui a5, %hi(.LCPI3_1)
; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5)
-; RV64MV-NEXT: srai a2, a2, 31
-; RV64MV-NEXT: sub a3, a3, a4
-; RV64MV-NEXT: sd a3, 0(sp)
-; RV64MV-NEXT: mulh a3, a2, a5
-; RV64MV-NEXT: srli a4, a3, 63
-; RV64MV-NEXT: srai a3, a3, 1
-; RV64MV-NEXT: add a3, a3, a4
-; RV64MV-NEXT: slli a4, a3, 3
+; RV64MV-NEXT: add a2, a2, a4
+; RV64MV-NEXT: slli a4, a4, 2
+; RV64MV-NEXT: add a2, a2, a4
+; RV64MV-NEXT: mulh a4, a3, a5
+; RV64MV-NEXT: srli a5, a4, 63
+; RV64MV-NEXT: srai a4, a4, 1
+; RV64MV-NEXT: add a4, a4, a5
; RV64MV-NEXT: lui a5, %hi(.LCPI3_2)
; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5)
-; RV64MV-NEXT: add a2, a2, a3
-; RV64MV-NEXT: sub a2, a2, a4
-; RV64MV-NEXT: sd a2, 8(sp)
-; RV64MV-NEXT: mulh a2, a1, a5
-; RV64MV-NEXT: srli a3, a2, 63
-; RV64MV-NEXT: srai a2, a2, 1
-; RV64MV-NEXT: add a2, a2, a3
-; RV64MV-NEXT: slli a3, a2, 2
-; RV64MV-NEXT: add a1, a1, a2
-; RV64MV-NEXT: add a1, a1, a3
-; RV64MV-NEXT: sd a1, 16(sp)
-; RV64MV-NEXT: mv a1, sp
+; RV64MV-NEXT: add a3, a3, a4
+; RV64MV-NEXT: slli a4, a4, 3
+; RV64MV-NEXT: sub a3, a3, a4
+; RV64MV-NEXT: mulh a4, a1, a5
+; RV64MV-NEXT: srli a5, a4, 63
+; RV64MV-NEXT: add a4, a4, a5
+; RV64MV-NEXT: li a5, 6
+; RV64MV-NEXT: mul a4, a4, a5
+; RV64MV-NEXT: sub a1, a1, a4
; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64MV-NEXT: vle64.v v8, (a1)
+; RV64MV-NEXT: vslide1down.vx v8, v8, a1
+; RV64MV-NEXT: vslide1down.vx v8, v8, a3
+; RV64MV-NEXT: vslide1down.vx v8, v8, a2
+; RV64MV-NEXT: vslide1down.vx v8, v8, a0
; RV64MV-NEXT: lui a1, %hi(.LCPI3_3)
; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_3)
; RV64MV-NEXT: vle64.v v10, (a1)
@@ -784,10 +788,6 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64MV-NEXT: srli a3, a3, 62
; RV64MV-NEXT: or a2, a3, a2
; RV64MV-NEXT: sw a2, 8(a0)
-; RV64MV-NEXT: addi sp, s0, -64
-; RV64MV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64MV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64MV-NEXT: addi sp, sp, 64
; RV64MV-NEXT: ret
%ld = load <3 x i33>, ptr %X
%srem = srem <3 x i33> %ld, <i33 6, i33 7, i33 -5>
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 4c0a776b5ee8..adce58ccc801 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -521,43 +521,41 @@ define void @test_urem_vec(ptr %X) nounwind {
;
; RV32MV-LABEL: test_urem_vec:
; RV32MV: # %bb.0:
-; RV32MV-NEXT: addi sp, sp, -16
; RV32MV-NEXT: lw a1, 0(a0)
; RV32MV-NEXT: andi a2, a1, 2047
-; RV32MV-NEXT: sh a2, 8(sp)
-; RV32MV-NEXT: slli a2, a1, 10
-; RV32MV-NEXT: srli a2, a2, 21
-; RV32MV-NEXT: sh a2, 10(sp)
+; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32MV-NEXT: vslide1down.vx v8, v8, a2
; RV32MV-NEXT: lbu a2, 4(a0)
+; RV32MV-NEXT: slli a3, a1, 10
+; RV32MV-NEXT: srli a3, a3, 21
+; RV32MV-NEXT: vslide1down.vx v8, v8, a3
; RV32MV-NEXT: slli a2, a2, 10
; RV32MV-NEXT: srli a1, a1, 22
; RV32MV-NEXT: or a1, a1, a2
; RV32MV-NEXT: andi a1, a1, 2047
-; RV32MV-NEXT: sh a1, 12(sp)
-; RV32MV-NEXT: addi a1, sp, 8
-; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32MV-NEXT: vle16.v v8, (a1)
-; RV32MV-NEXT: vmv.v.i v9, 10
+; RV32MV-NEXT: vslide1down.vx v8, v8, a1
+; RV32MV-NEXT: lui a1, %hi(.LCPI4_0)
+; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
+; RV32MV-NEXT: vle16.v v9, (a1)
+; RV32MV-NEXT: vslide1down.vx v8, v8, a0
+; RV32MV-NEXT: vid.v v10
+; RV32MV-NEXT: vsub.vv v8, v8, v10
+; RV32MV-NEXT: vmul.vv v8, v8, v9
+; RV32MV-NEXT: vadd.vv v9, v8, v8
+; RV32MV-NEXT: vmv.v.i v10, 10
; RV32MV-NEXT: li a1, 9
; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma
-; RV32MV-NEXT: vmv.s.x v9, a1
+; RV32MV-NEXT: vmv.s.x v10, a1
; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32MV-NEXT: lui a1, %hi(.LCPI4_0)
-; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
-; RV32MV-NEXT: vle16.v v10, (a1)
-; RV32MV-NEXT: vid.v v11
-; RV32MV-NEXT: vsub.vv v8, v8, v11
-; RV32MV-NEXT: vmul.vv v8, v8, v10
-; RV32MV-NEXT: vadd.vv v10, v8, v8
-; RV32MV-NEXT: vsll.vv v9, v10, v9
+; RV32MV-NEXT: vsll.vv v9, v9, v10
+; RV32MV-NEXT: li a1, 2047
+; RV32MV-NEXT: vand.vx v8, v8, a1
; RV32MV-NEXT: vmv.v.i v10, 0
-; RV32MV-NEXT: li a1, 1
+; RV32MV-NEXT: li a2, 1
; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma
; RV32MV-NEXT: vmv1r.v v11, v10
-; RV32MV-NEXT: vmv.s.x v11, a1
-; RV32MV-NEXT: li a1, 2047
+; RV32MV-NEXT: vmv.s.x v11, a2
; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32MV-NEXT: vand.vx v8, v8, a1
; RV32MV-NEXT: lui a2, %hi(.LCPI4_1)
; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1)
; RV32MV-NEXT: vle16.v v12, (a2)
@@ -581,47 +579,44 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV32MV-NEXT: or a1, a2, a1
; RV32MV-NEXT: or a1, a1, a3
; RV32MV-NEXT: sw a1, 0(a0)
-; RV32MV-NEXT: addi sp, sp, 16
; RV32MV-NEXT: ret
;
; RV64MV-LABEL: test_urem_vec:
; RV64MV: # %bb.0:
-; RV64MV-NEXT: addi sp, sp, -16
; RV64MV-NEXT: lbu a1, 4(a0)
; RV64MV-NEXT: lwu a2, 0(a0)
; RV64MV-NEXT: slli a1, a1, 32
; RV64MV-NEXT: or a1, a2, a1
-; RV64MV-NEXT: srli a2, a1, 22
-; RV64MV-NEXT: sh a2, 12(sp)
; RV64MV-NEXT: andi a2, a1, 2047
-; RV64MV-NEXT: sh a2, 8(sp)
-; RV64MV-NEXT: slli a1, a1, 42
-; RV64MV-NEXT: srli a1, a1, 53
-; RV64MV-NEXT: sh a1, 10(sp)
-; RV64MV-NEXT: addi a1, sp, 8
; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64MV-NEXT: vle16.v v8, (a1)
-; RV64MV-NEXT: vmv.v.i v9, 10
+; RV64MV-NEXT: vslide1down.vx v8, v8, a2
+; RV64MV-NEXT: slli a2, a1, 42
+; RV64MV-NEXT: srli a2, a2, 53
+; RV64MV-NEXT: vslide1down.vx v8, v8, a2
+; RV64MV-NEXT: srli a1, a1, 22
+; RV64MV-NEXT: vslide1down.vx v8, v8, a1
+; RV64MV-NEXT: lui a1, %hi(.LCPI4_0)
+; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
+; RV64MV-NEXT: vle16.v v9, (a1)
+; RV64MV-NEXT: vslide1down.vx v8, v8, a0
+; RV64MV-NEXT: vid.v v10
+; RV64MV-NEXT: vsub.vv v8, v8, v10
+; RV64MV-NEXT: vmul.vv v8, v8, v9
+; RV64MV-NEXT: vadd.vv v9, v8, v8
+; RV64MV-NEXT: vmv.v.i v10, 10
; RV64MV-NEXT: li a1, 9
; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma
-; RV64MV-NEXT: vmv.s.x v9, a1
+; RV64MV-NEXT: vmv.s.x v10, a1
; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64MV-NEXT: lui a1, %hi(.LCPI4_0)
-; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
-; RV64MV-NEXT: vle16.v v10, (a1)
-; RV64MV-NEXT: vid.v v11
-; RV64MV-NEXT: vsub.vv v8, v8, v11
-; RV64MV-NEXT: vmul.vv v8, v8, v10
-; RV64MV-NEXT: vadd.vv v10, v8, v8
-; RV64MV-NEXT: vsll.vv v9, v10, v9
+; RV64MV-NEXT: vsll.vv v9, v9, v10
+; RV64MV-NEXT: li a1, 2047
+; RV64MV-NEXT: vand.vx v8, v8, a1
; RV64MV-NEXT: vmv.v.i v10, 0
-; RV64MV-NEXT: li a1, 1
+; RV64MV-NEXT: li a2, 1
; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma
; RV64MV-NEXT: vmv1r.v v11, v10
-; RV64MV-NEXT: vmv.s.x v11, a1
-; RV64MV-NEXT: li a1, 2047
+; RV64MV-NEXT: vmv.s.x v11, a2
; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64MV-NEXT: vand.vx v8, v8, a1
; RV64MV-NEXT: lui a2, %hi(.LCPI4_1)
; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_1)
; RV64MV-NEXT: vle16.v v12, (a2)
@@ -645,7 +640,6 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64MV-NEXT: slli a1, a1, 31
; RV64MV-NEXT: srli a1, a1, 63
; RV64MV-NEXT: sb a1, 4(a0)
-; RV64MV-NEXT: addi sp, sp, 16
; RV64MV-NEXT: ret
%ld = load <3 x i11>, ptr %X
%urem = urem <3 x i11> %ld, <i11 6, i11 7, i11 -5>
More information about the llvm-commits
mailing list