[llvm] 7b5cf52 - [RISCV] Improve splatPartsI64WithVL for fixed vector constants where Hi and Lo are the same and the VL is constant.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 29 09:33:31 PDT 2023


Author: Craig Topper
Date: 2023-08-29T09:27:48-07:00
New Revision: 7b5cf52f32c09ff68b7f0ae5775dfbafc51a7907

URL: https://github.com/llvm/llvm-project/commit/7b5cf52f32c09ff68b7f0ae5775dfbafc51a7907
DIFF: https://github.com/llvm/llvm-project/commit/7b5cf52f32c09ff68b7f0ae5775dfbafc51a7907.diff

LOG: [RISCV] Improve splatPartsI64WithVL for fixed vector constants where Hi and Lo are the same and the VL is constant.

If doubling the VL will fit in a vsetivli, use it. It will be cheap
to change and cheap to change back.

This improves codegen from D158896.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D158896

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index bea3f4d6313b04..6a21aca0700f2b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3559,18 +3559,29 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
     if ((LoC >> 31) == HiC)
       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
 
-    // If vl is equal to VLMAX and Hi constant is equal to Lo, we could use
-    // vmv.v.x whose EEW = 32 to lower it.
-    if (LoC == HiC && (isAllOnesConstant(VL) ||
-                       (isa<RegisterSDNode>(VL) &&
-                        cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))) {
-      MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
-      // TODO: if vl <= min(VLMAX), we can also do this. But we could not
-      // access the subtarget here now.
-      auto InterVec = DAG.getNode(
-          RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
-                                  DAG.getRegister(RISCV::X0, MVT::i32));
-      return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
+    // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
+    // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
+    // vlmax vsetvli or vsetivli to change the VL.
+    // FIXME: Support larger constants?
+    // FIXME: Support non-constant VLs by saturating?
+    if (LoC == HiC) {
+      SDValue NewVL;
+      if (isAllOnesConstant(VL) ||
+          (isa<RegisterSDNode>(VL) &&
+           cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
+        NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
+      else if (isa<ConstantSDNode>(VL) &&
+               isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
+        NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
+
+      if (NewVL) {
+        MVT InterVT =
+            MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
+        auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
+                                    DAG.getUNDEF(InterVT), Lo,
+                                    DAG.getRegister(RISCV::X0, MVT::i32));
+        return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
+      }
     }
   }
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
index 2a92f30b851714..64304189ac8b51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
@@ -1385,22 +1385,10 @@ declare <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64>, <2 x i1>, i32)
 define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_bitreverse_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw zero, 4(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw zero, 12(sp)
 ; RV32-NEXT:    lui a1, 1044480
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
@@ -1414,7 +1402,7 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e
 ; RV32-NEXT:    lui a4, 4080
 ; RV32-NEXT:    vand.vx v10, v8, a4, v0.t
 ; RV32-NEXT:    vsll.vi v10, v10, 24, v0.t
-; RV32-NEXT:    mv a5, sp
+; RV32-NEXT:    addi a5, sp, 8
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vlse64.v v11, (a5), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
@@ -1434,33 +1422,36 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
 ; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
 ; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_bitreverse_v2i64:
@@ -1529,22 +1520,10 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e
 define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_bitreverse_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw zero, 4(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw zero, 12(sp)
 ; RV32-NEXT:    lui a1, 1044480
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
@@ -1558,7 +1537,7 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl)
 ; RV32-NEXT:    lui a4, 4080
 ; RV32-NEXT:    vand.vx v10, v8, a4
 ; RV32-NEXT:    vsll.vi v10, v10, 24
-; RV32-NEXT:    mv a5, sp
+; RV32-NEXT:    addi a5, sp, 8
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vlse64.v v11, (a5), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
@@ -1578,33 +1557,36 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl)
 ; RV32-NEXT:    vor.vv v8, v8, v10
 ; RV32-NEXT:    vor.vv v8, v9, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vand.vv v8, v8, v10
 ; RV32-NEXT:    vsll.vi v8, v8, 4
 ; RV32-NEXT:    vor.vv v8, v9, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vand.vv v8, v8, v10
 ; RV32-NEXT:    vsll.vi v8, v8, 2
 ; RV32-NEXT:    vor.vv v8, v9, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vand.vv v8, v8, v10
 ; RV32-NEXT:    vadd.vv v8, v8, v8
 ; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_bitreverse_v2i64_unmasked:
@@ -1677,22 +1659,10 @@ declare <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64>, <4 x i1>, i32)
 define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_bitreverse_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw zero, 4(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw zero, 12(sp)
 ; RV32-NEXT:    lui a1, 1044480
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
@@ -1706,7 +1676,7 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e
 ; RV32-NEXT:    lui a4, 4080
 ; RV32-NEXT:    vand.vx v12, v8, a4, v0.t
 ; RV32-NEXT:    vsll.vi v12, v12, 24, v0.t
-; RV32-NEXT:    mv a5, sp
+; RV32-NEXT:    addi a5, sp, 8
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vlse64.v v14, (a5), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
@@ -1726,33 +1696,36 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
 ; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
 ; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_bitreverse_v4i64:
@@ -1821,22 +1794,10 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e
 define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_bitreverse_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw zero, 4(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw zero, 12(sp)
 ; RV32-NEXT:    lui a1, 1044480
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
@@ -1850,7 +1811,7 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl)
 ; RV32-NEXT:    lui a4, 4080
 ; RV32-NEXT:    vand.vx v12, v8, a4
 ; RV32-NEXT:    vsll.vi v12, v12, 24
-; RV32-NEXT:    mv a5, sp
+; RV32-NEXT:    addi a5, sp, 8
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vlse64.v v14, (a5), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
@@ -1870,33 +1831,36 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl)
 ; RV32-NEXT:    vor.vv v8, v8, v12
 ; RV32-NEXT:    vor.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vand.vv v8, v8, v12
 ; RV32-NEXT:    vsll.vi v8, v8, 4
 ; RV32-NEXT:    vor.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vand.vv v8, v8, v12
 ; RV32-NEXT:    vsll.vi v8, v8, 2
 ; RV32-NEXT:    vor.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vand.vv v8, v8, v12
 ; RV32-NEXT:    vadd.vv v8, v8, v8
 ; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_bitreverse_v4i64_unmasked:
@@ -1969,22 +1933,10 @@ declare <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64>, <8 x i1>, i32)
 define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_bitreverse_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw zero, 4(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw zero, 12(sp)
 ; RV32-NEXT:    lui a1, 1044480
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
@@ -1998,7 +1950,7 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e
 ; RV32-NEXT:    lui a4, 4080
 ; RV32-NEXT:    vand.vx v12, v8, a4, v0.t
 ; RV32-NEXT:    vsll.vi v20, v12, 24, v0.t
-; RV32-NEXT:    mv a5, sp
+; RV32-NEXT:    addi a5, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV32-NEXT:    vlse64.v v12, (a5), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
@@ -2018,33 +1970,36 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e
 ; RV32-NEXT:    vor.vv v8, v8, v20, v0.t
 ; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
 ; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
 ; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_bitreverse_v8i64:
@@ -2113,22 +2068,10 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e
 define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_bitreverse_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw zero, 4(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw zero, 12(sp)
 ; RV32-NEXT:    lui a1, 1044480
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
@@ -2142,7 +2085,7 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl)
 ; RV32-NEXT:    lui a4, 4080
 ; RV32-NEXT:    vand.vx v16, v8, a4
 ; RV32-NEXT:    vsll.vi v16, v16, 24
-; RV32-NEXT:    mv a5, sp
+; RV32-NEXT:    addi a5, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV32-NEXT:    vlse64.v v20, (a5), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
@@ -2162,33 +2105,36 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl)
 ; RV32-NEXT:    vor.vv v8, v8, v16
 ; RV32-NEXT:    vor.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vand.vv v8, v8, v16
 ; RV32-NEXT:    vsll.vi v8, v8, 4
 ; RV32-NEXT:    vor.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vand.vv v8, v8, v16
 ; RV32-NEXT:    vsll.vi v8, v8, 2
 ; RV32-NEXT:    vor.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vand.vv v8, v8, v16
 ; RV32-NEXT:    vadd.vv v8, v8, v8
 ; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_bitreverse_v8i64_unmasked:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index 92ca238e08763a..2daa0e1bcefedc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -181,24 +181,12 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
 define void @bitreverse_v2i64(ptr %x, ptr %y) {
 ; RV32-LABEL: bitreverse_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    sw zero, 4(sp)
+; RV32-NEXT:    sw zero, 12(sp)
 ; RV32-NEXT:    lui a1, 1044480
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsrl.vx v9, v8, a1
@@ -209,7 +197,7 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) {
 ; RV32-NEXT:    vand.vx v10, v10, a3
 ; RV32-NEXT:    vor.vv v9, v10, v9
 ; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    mv a4, sp
+; RV32-NEXT:    addi a4, sp, 8
 ; RV32-NEXT:    vlse64.v v11, (a4), zero
 ; RV32-NEXT:    lui a4, 4080
 ; RV32-NEXT:    vand.vx v10, v10, a4
@@ -226,31 +214,40 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) {
 ; RV32-NEXT:    vand.vv v8, v8, v11
 ; RV32-NEXT:    vsll.vi v8, v8, 8
 ; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vlse64.v v11, (a1), zero
 ; RV32-NEXT:    vor.vv v8, v10, v8
 ; RV32-NEXT:    vor.vv v8, v8, v9
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vand.vv v9, v9, v11
-; RV32-NEXT:    vand.vv v8, v8, v11
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vand.vv v8, v8, v10
 ; RV32-NEXT:    vsll.vi v8, v8, 4
 ; RV32-NEXT:    vor.vv v8, v9, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 2
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a1), zero
 ; RV32-NEXT:    vsll.vi v8, v8, 2
 ; RV32-NEXT:    vor.vv v8, v9, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vand.vv v8, v8, v10
 ; RV32-NEXT:    vadd.vv v8, v8, v8
 ; RV32-NEXT:    vor.vv v8, v9, v8
 ; RV32-NEXT:    vse64.v v8, (a0)
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: bitreverse_v2i64:
@@ -741,24 +738,12 @@ declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
 define void @bitreverse_v4i64(ptr %x, ptr %y) {
 ; LMULMAX2-RV32-LABEL: bitreverse_v4i64:
 ; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    addi sp, sp, -32
-; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 32
+; LMULMAX2-RV32-NEXT:    addi sp, sp, -16
+; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 16
 ; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    sw zero, 4(sp)
+; LMULMAX2-RV32-NEXT:    sw zero, 12(sp)
 ; LMULMAX2-RV32-NEXT:    lui a1, 1044480
-; LMULMAX2-RV32-NEXT:    sw a1, 0(sp)
-; LMULMAX2-RV32-NEXT:    lui a1, 61681
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 24(sp)
-; LMULMAX2-RV32-NEXT:    lui a1, 209715
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 16(sp)
-; LMULMAX2-RV32-NEXT:    lui a1, 349525
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    sw a1, 12(sp)
 ; LMULMAX2-RV32-NEXT:    sw a1, 8(sp)
 ; LMULMAX2-RV32-NEXT:    li a1, 56
 ; LMULMAX2-RV32-NEXT:    vsrl.vx v10, v8, a1
@@ -769,7 +754,7 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
 ; LMULMAX2-RV32-NEXT:    vand.vx v12, v12, a3
 ; LMULMAX2-RV32-NEXT:    vor.vv v10, v12, v10
 ; LMULMAX2-RV32-NEXT:    vsrl.vi v12, v8, 24
-; LMULMAX2-RV32-NEXT:    mv a4, sp
+; LMULMAX2-RV32-NEXT:    addi a4, sp, 8
 ; LMULMAX2-RV32-NEXT:    vlse64.v v14, (a4), zero
 ; LMULMAX2-RV32-NEXT:    lui a4, 4080
 ; LMULMAX2-RV32-NEXT:    vand.vx v12, v12, a4
@@ -786,31 +771,40 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
 ; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v14
 ; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 8
 ; LMULMAX2-RV32-NEXT:    vor.vv v8, v16, v8
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32-NEXT:    vlse64.v v14, (a1), zero
 ; LMULMAX2-RV32-NEXT:    vor.vv v8, v12, v8
 ; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v10, v14
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v14
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32-NEXT:    vlse64.v v12, (a1), zero
+; LMULMAX2-RV32-NEXT:    lui a1, 61681
+; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vand.vv v10, v10, v12
+; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v12
 ; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 4
 ; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 2
+; LMULMAX2-RV32-NEXT:    lui a1, 209715
+; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; LMULMAX2-RV32-NEXT:    vand.vv v10, v10, v12
 ; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32-NEXT:    vlse64.v v12, (a1), zero
 ; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 2
 ; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-RV32-NEXT:    lui a1, 349525
+; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; LMULMAX2-RV32-NEXT:    vand.vv v10, v10, v12
 ; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v12
 ; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v8
 ; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    addi sp, sp, 32
+; LMULMAX2-RV32-NEXT:    addi sp, sp, 16
 ; LMULMAX2-RV32-NEXT:    ret
 ;
 ; LMULMAX2-RV64-LABEL: bitreverse_v4i64:
@@ -877,26 +871,14 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
 ;
 ; LMULMAX1-RV32-LABEL: bitreverse_v4i64:
 ; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi sp, sp, -32
-; LMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 32
+; LMULMAX1-RV32-NEXT:    addi sp, sp, -16
+; LMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 16
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
 ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    sw zero, 4(sp)
+; LMULMAX1-RV32-NEXT:    sw zero, 12(sp)
 ; LMULMAX1-RV32-NEXT:    lui a2, 1044480
-; LMULMAX1-RV32-NEXT:    sw a2, 0(sp)
-; LMULMAX1-RV32-NEXT:    lui a2, 61681
-; LMULMAX1-RV32-NEXT:    addi a2, a2, -241
-; LMULMAX1-RV32-NEXT:    sw a2, 28(sp)
-; LMULMAX1-RV32-NEXT:    sw a2, 24(sp)
-; LMULMAX1-RV32-NEXT:    lui a2, 209715
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 819
-; LMULMAX1-RV32-NEXT:    sw a2, 20(sp)
-; LMULMAX1-RV32-NEXT:    sw a2, 16(sp)
-; LMULMAX1-RV32-NEXT:    lui a2, 349525
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT:    sw a2, 12(sp)
 ; LMULMAX1-RV32-NEXT:    sw a2, 8(sp)
 ; LMULMAX1-RV32-NEXT:    li a2, 56
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v9, a2
@@ -907,7 +889,7 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
 ; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a4
 ; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV32-NEXT:    mv a5, sp
+; LMULMAX1-RV32-NEXT:    addi a5, sp, 8
 ; LMULMAX1-RV32-NEXT:    vlse64.v v12, (a5), zero
 ; LMULMAX1-RV32-NEXT:    lui a5, 4080
 ; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a5
@@ -924,25 +906,34 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
 ; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
 ; LMULMAX1-RV32-NEXT:    vsll.vx v9, v9, a3
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v13, v9
-; LMULMAX1-RV32-NEXT:    addi a6, sp, 24
-; LMULMAX1-RV32-NEXT:    vlse64.v v13, (a6), zero
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v11
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v13
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v13
-; LMULMAX1-RV32-NEXT:    addi a6, sp, 16
-; LMULMAX1-RV32-NEXT:    vlse64.v v11, (a6), zero
+; LMULMAX1-RV32-NEXT:    lui a6, 61681
+; LMULMAX1-RV32-NEXT:    addi a6, a6, -241
+; LMULMAX1-RV32-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v11, a6
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v11
+; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v11
 ; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 4
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    addi a6, sp, 8
-; LMULMAX1-RV32-NEXT:    vlse64.v v14, (a6), zero
+; LMULMAX1-RV32-NEXT:    lui a6, 209715
+; LMULMAX1-RV32-NEXT:    addi a6, a6, 819
+; LMULMAX1-RV32-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v13, a6
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v13
+; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v13
 ; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 2
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-RV32-NEXT:    lui a6, 349525
+; LMULMAX1-RV32-NEXT:    addi a6, a6, 1365
+; LMULMAX1-RV32-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v14, a6
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v14
 ; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v14
 ; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v9
@@ -969,13 +960,13 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v15, v8
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v13
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v13
+; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v11
+; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
 ; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 4
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
+; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v13
+; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v13
 ; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 2
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
@@ -985,7 +976,7 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
 ; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    addi sp, sp, 32
+; LMULMAX1-RV32-NEXT:    addi sp, sp, 16
 ; LMULMAX1-RV32-NEXT:    ret
 ;
 ; LMULMAX1-RV64-LABEL: bitreverse_v4i64:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
index e7c459cfc9ff7d..66d9280490613c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
@@ -1449,24 +1449,6 @@ declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
 define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
@@ -1483,15 +1465,17 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -1499,19 +1483,20 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v2i64:
@@ -1568,24 +1553,6 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v9
@@ -1602,15 +1569,17 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v9
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -1618,19 +1587,20 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v2i64_unmasked:
@@ -1691,24 +1661,6 @@ declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
 define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
@@ -1725,15 +1677,17 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -1741,19 +1695,20 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v4i64:
@@ -1810,24 +1765,6 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v10
@@ -1844,15 +1781,17 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v10
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -1860,19 +1799,20 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v4i64_unmasked:
@@ -1933,24 +1873,6 @@ declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
 define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
@@ -1967,15 +1889,17 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -1983,19 +1907,20 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v8i64:
@@ -2052,24 +1977,6 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v12
@@ -2086,15 +1993,17 @@ define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v12
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -2102,19 +2011,20 @@ define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v8i64_unmasked:
@@ -4690,24 +4600,6 @@ define <16 x i32> @vp_ctlz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroex
 define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
@@ -4724,15 +4616,17 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -4740,19 +4634,20 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v2i64:
@@ -4809,24 +4704,6 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
 define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v9
@@ -4843,15 +4720,17 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vor.vv v8, v8, v9
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -4859,19 +4738,20 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vadd.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v2i64_unmasked:
@@ -4930,24 +4810,6 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
 define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
@@ -4964,15 +4826,17 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -4980,19 +4844,20 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v4i64:
@@ -5049,24 +4914,6 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
 define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v10
@@ -5083,15 +4930,17 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vor.vv v8, v8, v10
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -5099,19 +4948,20 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vadd.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v4i64_unmasked:
@@ -5170,24 +5020,6 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
 define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
@@ -5204,15 +5036,17 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -5220,19 +5054,20 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v8i64:
@@ -5289,24 +5124,6 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
 define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v12
@@ -5323,15 +5140,17 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vor.vv v8, v8, v12
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -5339,19 +5158,20 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v8i64_unmasked:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 017b8612f7e69a..dd368488338c69 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -460,25 +460,8 @@ declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
 define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-LABEL: ctlz_v2i64:
 ; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, -32
 ; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 24(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 16(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 8(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 0(sp)
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 2
@@ -493,29 +476,40 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v9, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v11, v8, 1
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v11, v9
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
+; LMULMAX2-RV32I-NEXT:    lui a1, 349525
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v9, v9, v10
 ; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v8, v10
+; LMULMAX2-RV32I-NEXT:    lui a1, 209715
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v9
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT:    mv a1, sp
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v11, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v11
 ; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT:    lui a1, 61681
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT:    lui a1, 4112
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    li a1, 56
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32I-NEXT:    ret
 ;
 ; LMULMAX2-RV64I-LABEL: ctlz_v2i64:
@@ -1160,25 +1154,8 @@ declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
 define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-LABEL: ctlz_v4i64:
 ; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, -32
 ; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 24(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 16(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 8(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 0(sp)
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 2
@@ -1193,29 +1170,40 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v10, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v14, v8, 1
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v14, v10
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-RV32I-NEXT:    lui a1, 349525
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v12, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v10, v10, v12
 ; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v12
+; LMULMAX2-RV32I-NEXT:    lui a1, 209715
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v12, v8, v10
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v12
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    mv a1, sp
-; LMULMAX2-RV32I-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v14, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v14
 ; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v12
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v12, v8
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    lui a1, 61681
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    lui a1, 4112
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    li a1, 56
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32I-NEXT:    ret
 ;
 ; LMULMAX2-RV64I-LABEL: ctlz_v4i64:
@@ -1788,25 +1776,8 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v2i64:
 ; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, -32
 ; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 24(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 16(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 8(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 0(sp)
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 2
@@ -1821,29 +1792,40 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v9, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v11, v8, 1
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v11, v9
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
+; LMULMAX2-RV32I-NEXT:    lui a1, 349525
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v9, v9, v10
 ; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v8, v10
+; LMULMAX2-RV32I-NEXT:    lui a1, 209715
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v9
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT:    mv a1, sp
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v11, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v11
 ; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT:    lui a1, 61681
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT:    lui a1, 4112
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    li a1, 56
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32I-NEXT:    ret
 ;
 ; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v2i64:
@@ -2458,25 +2440,8 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v4i64:
 ; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, -32
 ; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 24(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 16(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 8(sp)
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 0(sp)
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 2
@@ -2491,29 +2456,40 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v10, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v14, v8, 1
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v14, v10
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-RV32I-NEXT:    lui a1, 349525
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v12, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v10, v10, v12
 ; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v12
+; LMULMAX2-RV32I-NEXT:    lui a1, 209715
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v12, v8, v10
 ; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v12
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    mv a1, sp
-; LMULMAX2-RV32I-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v14, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v14
 ; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v12
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v12, v8
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    lui a1, 61681
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    lui a1, 4112
+; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    li a1, 56
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32I-NEXT:    ret
 ;
 ; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v4i64:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
index 10c576c1251d23..a7de50c8dc5d0e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
@@ -1073,35 +1073,19 @@ declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
 define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -1109,19 +1093,20 @@ define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v2i64:
@@ -1164,35 +1149,19 @@ define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -1200,19 +1169,20 @@ define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v2i64_unmasked:
@@ -1259,35 +1229,19 @@ declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
 define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -1295,19 +1249,20 @@ define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v4i64:
@@ -1350,35 +1305,19 @@ define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -1386,19 +1325,20 @@ define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v4i64_unmasked:
@@ -1445,35 +1385,19 @@ declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
 define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -1481,19 +1405,20 @@ define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v8i64:
@@ -1536,35 +1461,19 @@ define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -1572,19 +1481,20 @@ define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v8i64_unmasked:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index f027de4f1342bc..2b2d9c0a901e8f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -288,49 +288,42 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
 define void @ctpop_v2i64(ptr %x, ptr %y) {
 ; LMULMAX2-RV32-LABEL: ctpop_v2i64:
 ; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    addi sp, sp, -32
-; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 32
 ; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
 ; LMULMAX2-RV32-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 24(sp)
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-RV32-NEXT:    vand.vv v9, v10, v9
+; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV32-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 16(sp)
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32-NEXT:    vand.vv v10, v8, v9
+; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v9
+; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 8(sp)
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v9
 ; LMULMAX2-RV32-NEXT:    lui a1, 4112
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 0(sp)
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32-NEXT:    vsrl.vi v11, v8, 1
-; LMULMAX2-RV32-NEXT:    vand.vv v9, v11, v9
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vand.vv v9, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32-NEXT:    mv a1, sp
-; LMULMAX2-RV32-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32-NEXT:    vsrl.vi v11, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vmul.vv v8, v8, v10
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32-NEXT:    vmul.vv v8, v8, v9
 ; LMULMAX2-RV32-NEXT:    li a1, 56
 ; LMULMAX2-RV32-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32-NEXT:    ret
 ;
 ; LMULMAX2-RV64-LABEL: ctpop_v2i64:
@@ -371,49 +364,42 @@ define void @ctpop_v2i64(ptr %x, ptr %y) {
 ;
 ; LMULMAX1-RV32-LABEL: ctpop_v2i64:
 ; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi sp, sp, -32
-; LMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 32
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
 ; LMULMAX1-RV32-NEXT:    lui a1, 349525
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT:    sw a1, 28(sp)
-; LMULMAX1-RV32-NEXT:    sw a1, 24(sp)
+; LMULMAX1-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a1
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-RV32-NEXT:    vand.vv v9, v10, v9
+; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX1-RV32-NEXT:    lui a1, 209715
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX1-RV32-NEXT:    sw a1, 20(sp)
-; LMULMAX1-RV32-NEXT:    sw a1, 16(sp)
+; LMULMAX1-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a1
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vand.vv v10, v8, v9
+; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v9
+; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX1-RV32-NEXT:    lui a1, 61681
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX1-RV32-NEXT:    sw a1, 12(sp)
-; LMULMAX1-RV32-NEXT:    sw a1, 8(sp)
+; LMULMAX1-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a1
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v9
 ; LMULMAX1-RV32-NEXT:    lui a1, 4112
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 257
-; LMULMAX1-RV32-NEXT:    sw a1, 4(sp)
-; LMULMAX1-RV32-NEXT:    sw a1, 0(sp)
-; LMULMAX1-RV32-NEXT:    addi a1, sp, 24
-; LMULMAX1-RV32-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX1-RV32-NEXT:    addi a1, sp, 16
-; LMULMAX1-RV32-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v8, 1
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v11, v9
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT:    addi a1, sp, 8
-; LMULMAX1-RV32-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX1-RV32-NEXT:    mv a1, sp
-; LMULMAX1-RV32-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v10
+; LMULMAX1-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a1
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v9
 ; LMULMAX1-RV32-NEXT:    li a1, 56
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi sp, sp, 32
 ; LMULMAX1-RV32-NEXT:    ret
 ;
 ; LMULMAX1-RV64-LABEL: ctpop_v2i64:
@@ -843,49 +829,42 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
 define void @ctpop_v4i64(ptr %x, ptr %y) {
 ; LMULMAX2-RV32-LABEL: ctpop_v4i64:
 ; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    addi sp, sp, -32
-; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 32
 ; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
 ; LMULMAX2-RV32-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 24(sp)
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vsrl.vi v12, v8, 1
+; LMULMAX2-RV32-NEXT:    vand.vv v10, v12, v10
+; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
 ; LMULMAX2-RV32-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 16(sp)
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vand.vv v12, v8, v10
+; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-RV32-NEXT:    vadd.vv v8, v12, v8
+; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 8(sp)
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX2-RV32-NEXT:    lui a1, 4112
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 0(sp)
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32-NEXT:    vsrl.vi v14, v8, 1
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v14, v10
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v8, v12
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32-NEXT:    mv a1, sp
-; LMULMAX2-RV32-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32-NEXT:    vsrl.vi v14, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v14
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vmul.vv v8, v8, v12
+; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT:    vmul.vv v8, v8, v10
 ; LMULMAX2-RV32-NEXT:    li a1, 56
 ; LMULMAX2-RV32-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32-NEXT:    ret
 ;
 ; LMULMAX2-RV64-LABEL: ctpop_v4i64:
@@ -926,46 +905,40 @@ define void @ctpop_v4i64(ptr %x, ptr %y) {
 ;
 ; LMULMAX1-RV32-LABEL: ctpop_v4i64:
 ; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi sp, sp, -32
-; LMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 32
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
 ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
 ; LMULMAX1-RV32-NEXT:    lui a2, 349525
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT:    sw a2, 28(sp)
-; LMULMAX1-RV32-NEXT:    sw a2, 24(sp)
+; LMULMAX1-RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a2
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 1
+; LMULMAX1-RV32-NEXT:    vand.vv v11, v11, v10
+; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v11
 ; LMULMAX1-RV32-NEXT:    lui a2, 209715
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, 819
-; LMULMAX1-RV32-NEXT:    sw a2, 20(sp)
-; LMULMAX1-RV32-NEXT:    sw a2, 16(sp)
-; LMULMAX1-RV32-NEXT:    lui a2, 61681
-; LMULMAX1-RV32-NEXT:    addi a2, a2, -241
-; LMULMAX1-RV32-NEXT:    sw a2, 12(sp)
-; LMULMAX1-RV32-NEXT:    sw a2, 8(sp)
-; LMULMAX1-RV32-NEXT:    lui a2, 4112
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 257
-; LMULMAX1-RV32-NEXT:    sw a2, 4(sp)
-; LMULMAX1-RV32-NEXT:    sw a2, 0(sp)
-; LMULMAX1-RV32-NEXT:    addi a2, sp, 24
-; LMULMAX1-RV32-NEXT:    vlse64.v v10, (a2), zero
-; LMULMAX1-RV32-NEXT:    addi a2, sp, 16
-; LMULMAX1-RV32-NEXT:    vlse64.v v11, (a2), zero
-; LMULMAX1-RV32-NEXT:    vsrl.vi v12, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vv v12, v12, v10
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v12
+; LMULMAX1-RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v11, a2
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX1-RV32-NEXT:    vand.vv v12, v9, v11
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
 ; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v11
 ; LMULMAX1-RV32-NEXT:    vadd.vv v9, v12, v9
-; LMULMAX1-RV32-NEXT:    addi a2, sp, 8
-; LMULMAX1-RV32-NEXT:    vlse64.v v12, (a2), zero
-; LMULMAX1-RV32-NEXT:    mv a2, sp
-; LMULMAX1-RV32-NEXT:    vlse64.v v13, (a2), zero
-; LMULMAX1-RV32-NEXT:    vsrl.vi v14, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v14
+; LMULMAX1-RV32-NEXT:    vsrl.vi v12, v9, 4
+; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v12
+; LMULMAX1-RV32-NEXT:    lui a2, 61681
+; LMULMAX1-RV32-NEXT:    addi a2, a2, -241
+; LMULMAX1-RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v12, a2
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v12
+; LMULMAX1-RV32-NEXT:    lui a2, 4112
+; LMULMAX1-RV32-NEXT:    addi a2, a2, 257
+; LMULMAX1-RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vmv.v.x v13, a2
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v13
 ; LMULMAX1-RV32-NEXT:    li a2, 56
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v9, v9, a2
@@ -983,7 +956,6 @@ define void @ctpop_v4i64(ptr %x, ptr %y) {
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v8, v8, a2
 ; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
 ; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    addi sp, sp, 32
 ; LMULMAX1-RV32-NEXT:    ret
 ;
 ; LMULMAX1-RV64-LABEL: ctpop_v4i64:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
index 95141df2cc5a14..0a94bbfb27c167 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
@@ -1233,39 +1233,23 @@ declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
 define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -1273,19 +1257,20 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v2i64:
@@ -1332,39 +1317,23 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsub.vx v9, v8, a1
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vand.vv v8, v8, v9
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -1372,19 +1341,20 @@ define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v2i64_unmasked:
@@ -1435,39 +1405,23 @@ declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
 define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -1475,19 +1429,20 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v4i64:
@@ -1534,39 +1489,23 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsub.vx v10, v8, a1
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vand.vv v8, v8, v10
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -1574,19 +1513,20 @@ define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v4i64_unmasked:
@@ -1637,39 +1577,23 @@ declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
 define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -1677,19 +1601,20 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v8i64:
@@ -1736,39 +1661,23 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsub.vx v12, v8, a1
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vand.vv v8, v8, v12
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -1776,19 +1685,20 @@ define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v8i64_unmasked:
@@ -3978,39 +3888,23 @@ define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroex
 define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -4018,19 +3912,20 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v2i64:
@@ -4077,39 +3972,23 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
 define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsub.vx v9, v8, a1
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vand.vv v8, v8, v9
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v10, v8, v9
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -4117,19 +3996,20 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vadd.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v9, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v2i64_unmasked:
@@ -4178,39 +4058,23 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
 define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -4218,19 +4082,20 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v4i64:
@@ -4277,39 +4142,23 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
 define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsub.vx v10, v8, a1
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vand.vv v8, v8, v10
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v12, v8, v10
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -4317,19 +4166,20 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vadd.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v4i64_unmasked:
@@ -4378,39 +4228,23 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
 define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
@@ -4418,19 +4252,20 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v8i64:
@@ -4477,39 +4312,23 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
 define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    li a1, 1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsub.vx v12, v8, a1
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vand.vv v8, v8, v12
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v16, v8, v12
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
@@ -4517,19 +4336,20 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a1), zero
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v8i64_unmasked:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 9cdcf59ba914d9..5165e716c73755 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -456,52 +456,46 @@ declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
 define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-LABEL: cttz_v2i64:
 ; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, -32
 ; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
+; LMULMAX2-RV32I-NEXT:    li a1, 1
+; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
+; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV32I-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 24(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v9, v9, v10
+; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 16(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v9
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 8(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    lui a1, 4112
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 0(sp)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v11, v8, 1
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v11, v9
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT:    mv a1, sp
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v11, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    li a1, 56
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32I-NEXT:    ret
 ;
 ; LMULMAX2-RV64I-LABEL: cttz_v2i64:
@@ -1115,52 +1109,46 @@ declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
 define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-LABEL: cttz_v4i64:
 ; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, -32
 ; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
+; LMULMAX2-RV32I-NEXT:    li a1, 1
+; LMULMAX2-RV32I-NEXT:    vsub.vx v10, v8, a1
+; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV32I-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 24(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v12, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v10, v10, v12
+; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 16(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v12, v8, v10
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v12, v8
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 8(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    lui a1, 4112
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 0(sp)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v14, v8, 1
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v14, v10
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v12
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v12
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    mv a1, sp
-; LMULMAX2-RV32I-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v14, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v14
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v12
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    li a1, 56
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32I-NEXT:    ret
 ;
 ; LMULMAX2-RV64I-LABEL: cttz_v4i64:
@@ -1723,52 +1711,46 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v2i64:
 ; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, -32
 ; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
+; LMULMAX2-RV32I-NEXT:    li a1, 1
+; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
+; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV32I-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 24(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v9, v9, v10
+; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 16(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v9
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 8(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    lui a1, 4112
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 0(sp)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v11, v8, 1
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v11, v9
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT:    vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT:    mv a1, sp
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v11, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v9
 ; LMULMAX2-RV32I-NEXT:    li a1, 56
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32I-NEXT:    ret
 ;
 ; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v2i64:
@@ -2342,52 +2324,46 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v4i64:
 ; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, -32
 ; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
+; LMULMAX2-RV32I-NEXT:    li a1, 1
+; LMULMAX2-RV32I-NEXT:    vsub.vx v10, v8, a1
+; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV32I-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 24(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v12, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v10, v10, v12
+; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 16(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v12, v8, v10
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v12, v8
+; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 8(sp)
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    lui a1, 4112
 ; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT:    sw a1, 0(sp)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v14, v8, 1
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v14, v10
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v12
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v12
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT:    mv a1, sp
-; LMULMAX2-RV32I-NEXT:    vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v14, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v14
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v12
+; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
 ; LMULMAX2-RV32I-NEXT:    li a1, 56
 ; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
 ; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    addi sp, sp, 32
 ; LMULMAX2-RV32I-NEXT:    ret
 ;
 ; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v4i64:


        


More information about the llvm-commits mailing list