[llvm] 7b5cf52 - [RISCV] Improve splatPartsI64WithVL for fixed vector constants where Hi and Lo are the same and the VL is constant.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 29 09:33:31 PDT 2023
Author: Craig Topper
Date: 2023-08-29T09:27:48-07:00
New Revision: 7b5cf52f32c09ff68b7f0ae5775dfbafc51a7907
URL: https://github.com/llvm/llvm-project/commit/7b5cf52f32c09ff68b7f0ae5775dfbafc51a7907
DIFF: https://github.com/llvm/llvm-project/commit/7b5cf52f32c09ff68b7f0ae5775dfbafc51a7907.diff
LOG: [RISCV] Improve splatPartsI64WithVL for fixed vector constants where Hi and Lo are the same and the VL is constant.
If doubling the VL will fit in a vsetivli, use it. It will be cheap
to change and cheap to change back.
This improves codegen from D158896.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D158896
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index bea3f4d6313b04..6a21aca0700f2b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3559,18 +3559,29 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
if ((LoC >> 31) == HiC)
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
- // If vl is equal to VLMAX and Hi constant is equal to Lo, we could use
- // vmv.v.x whose EEW = 32 to lower it.
- if (LoC == HiC && (isAllOnesConstant(VL) ||
- (isa<RegisterSDNode>(VL) &&
- cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))) {
- MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
- // TODO: if vl <= min(VLMAX), we can also do this. But we could not
- // access the subtarget here now.
- auto InterVec = DAG.getNode(
- RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
- DAG.getRegister(RISCV::X0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
+ // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
+ // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
+ // vlmax vsetvli or vsetivli to change the VL.
+ // FIXME: Support larger constants?
+ // FIXME: Support non-constant VLs by saturating?
+ if (LoC == HiC) {
+ SDValue NewVL;
+ if (isAllOnesConstant(VL) ||
+ (isa<RegisterSDNode>(VL) &&
+ cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
+ NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
+ else if (isa<ConstantSDNode>(VL) &&
+ isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
+ NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
+
+ if (NewVL) {
+ MVT InterVT =
+ MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
+ auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
+ DAG.getUNDEF(InterVT), Lo,
+ DAG.getRegister(RISCV::X0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
+ }
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
index 2a92f30b851714..64304189ac8b51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
@@ -1385,22 +1385,10 @@ declare <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64>, <2 x i1>, i32)
define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_bitreverse_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
@@ -1414,7 +1402,7 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v10, v8, a4, v0.t
; RV32-NEXT: vsll.vi v10, v10, 24, v0.t
-; RV32-NEXT: mv a5, sp
+; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v11, (a5), zero
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
@@ -1434,33 +1422,36 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e
; RV32-NEXT: vor.vv v8, v8, v10, v0.t
; RV32-NEXT: vor.vv v8, v9, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10, v0.t
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
; RV32-NEXT: vor.vv v8, v9, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10, v0.t
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
; RV32-NEXT: vor.vv v8, v9, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10, v0.t
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vp_bitreverse_v2i64:
@@ -1529,22 +1520,10 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e
define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_bitreverse_v2i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
@@ -1558,7 +1537,7 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl)
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v10, v8, a4
; RV32-NEXT: vsll.vi v10, v10, 24
-; RV32-NEXT: mv a5, sp
+; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v11, (a5), zero
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
@@ -1578,33 +1557,36 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl)
; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: vor.vv v8, v9, v8
; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vand.vv v8, v8, v10
; RV32-NEXT: vsll.vi v8, v8, 4
; RV32-NEXT: vor.vv v8, v9, v8
; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vand.vv v8, v8, v10
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vor.vv v8, v9, v8
; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vand.vv v8, v8, v10
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vp_bitreverse_v2i64_unmasked:
@@ -1677,22 +1659,10 @@ declare <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64>, <4 x i1>, i32)
define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_bitreverse_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
@@ -1706,7 +1676,7 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v12, v8, a4, v0.t
; RV32-NEXT: vsll.vi v12, v12, 24, v0.t
-; RV32-NEXT: mv a5, sp
+; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v14, (a5), zero
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
@@ -1726,33 +1696,36 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e
; RV32-NEXT: vor.vv v8, v8, v12, v0.t
; RV32-NEXT: vor.vv v8, v10, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12, v0.t
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
; RV32-NEXT: vor.vv v8, v10, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12, v0.t
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
; RV32-NEXT: vor.vv v8, v10, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12, v0.t
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vp_bitreverse_v4i64:
@@ -1821,22 +1794,10 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e
define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_bitreverse_v4i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
@@ -1850,7 +1811,7 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl)
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v12, v8, a4
; RV32-NEXT: vsll.vi v12, v12, 24
-; RV32-NEXT: mv a5, sp
+; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v14, (a5), zero
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
@@ -1870,33 +1831,36 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl)
; RV32-NEXT: vor.vv v8, v8, v12
; RV32-NEXT: vor.vv v8, v10, v8
; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12
; RV32-NEXT: vand.vv v8, v8, v12
; RV32-NEXT: vsll.vi v8, v8, 4
; RV32-NEXT: vor.vv v8, v10, v8
; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12
; RV32-NEXT: vand.vv v8, v8, v12
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vor.vv v8, v10, v8
; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12
; RV32-NEXT: vand.vv v8, v8, v12
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vp_bitreverse_v4i64_unmasked:
@@ -1969,22 +1933,10 @@ declare <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64>, <8 x i1>, i32)
define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_bitreverse_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
@@ -1998,7 +1950,7 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v12, v8, a4, v0.t
; RV32-NEXT: vsll.vi v20, v12, 24, v0.t
-; RV32-NEXT: mv a5, sp
+; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a5), zero
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
@@ -2018,33 +1970,36 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e
; RV32-NEXT: vor.vv v8, v8, v20, v0.t
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16, v0.t
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
; RV32-NEXT: vor.vv v8, v12, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16, v0.t
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
; RV32-NEXT: vor.vv v8, v12, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16, v0.t
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vp_bitreverse_v8i64:
@@ -2113,22 +2068,10 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e
define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_bitreverse_v8i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
@@ -2142,7 +2085,7 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl)
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4
; RV32-NEXT: vsll.vi v16, v16, 24
-; RV32-NEXT: mv a5, sp
+; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v20, (a5), zero
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
@@ -2162,33 +2105,36 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl)
; RV32-NEXT: vor.vv v8, v8, v16
; RV32-NEXT: vor.vv v8, v12, v8
; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: vsll.vi v8, v8, 4
; RV32-NEXT: vor.vv v8, v12, v8
; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vor.vv v8, v12, v8
; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vp_bitreverse_v8i64_unmasked:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index 92ca238e08763a..2daa0e1bcefedc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -181,24 +181,12 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
define void @bitreverse_v2i64(ptr %x, ptr %y) {
; RV32-LABEL: bitreverse_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsrl.vx v9, v8, a1
@@ -209,7 +197,7 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) {
; RV32-NEXT: vand.vx v10, v10, a3
; RV32-NEXT: vor.vv v9, v10, v9
; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: mv a4, sp
+; RV32-NEXT: addi a4, sp, 8
; RV32-NEXT: vlse64.v v11, (a4), zero
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v10, v10, a4
@@ -226,31 +214,40 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) {
; RV32-NEXT: vand.vv v8, v8, v11
; RV32-NEXT: vsll.vi v8, v8, 8
; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vlse64.v v11, (a1), zero
; RV32-NEXT: vor.vv v8, v10, v8
; RV32-NEXT: vor.vv v8, v8, v9
; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vand.vv v9, v9, v11
-; RV32-NEXT: vand.vv v8, v8, v11
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vand.vv v9, v9, v10
+; RV32-NEXT: vand.vv v8, v8, v10
; RV32-NEXT: vsll.vi v8, v8, 4
; RV32-NEXT: vor.vv v8, v9, v8
; RV32-NEXT: vsrl.vi v9, v8, 2
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vlse64.v v10, (a1), zero
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vor.vv v8, v9, v8
; RV32-NEXT: vsrl.vi v9, v8, 1
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vand.vv v8, v8, v10
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: vor.vv v8, v9, v8
; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: bitreverse_v2i64:
@@ -741,24 +738,12 @@ declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
define void @bitreverse_v4i64(ptr %x, ptr %y) {
; LMULMAX2-RV32-LABEL: bitreverse_v4i64:
; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: addi sp, sp, -32
-; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 32
+; LMULMAX2-RV32-NEXT: addi sp, sp, -16
+; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16
; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
-; LMULMAX2-RV32-NEXT: sw zero, 4(sp)
+; LMULMAX2-RV32-NEXT: sw zero, 12(sp)
; LMULMAX2-RV32-NEXT: lui a1, 1044480
-; LMULMAX2-RV32-NEXT: sw a1, 0(sp)
-; LMULMAX2-RV32-NEXT: lui a1, 61681
-; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 24(sp)
-; LMULMAX2-RV32-NEXT: lui a1, 209715
-; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 16(sp)
-; LMULMAX2-RV32-NEXT: lui a1, 349525
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: sw a1, 12(sp)
; LMULMAX2-RV32-NEXT: sw a1, 8(sp)
; LMULMAX2-RV32-NEXT: li a1, 56
; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1
@@ -769,7 +754,7 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3
; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10
; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24
-; LMULMAX2-RV32-NEXT: mv a4, sp
+; LMULMAX2-RV32-NEXT: addi a4, sp, 8
; LMULMAX2-RV32-NEXT: vlse64.v v14, (a4), zero
; LMULMAX2-RV32-NEXT: lui a4, 4080
; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4
@@ -786,31 +771,40 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14
; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8
; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8
-; LMULMAX2-RV32-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32-NEXT: vlse64.v v14, (a1), zero
; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8
; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v14
-; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14
-; LMULMAX2-RV32-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32-NEXT: vlse64.v v12, (a1), zero
+; LMULMAX2-RV32-NEXT: lui a1, 61681
+; LMULMAX2-RV32-NEXT: addi a1, a1, -241
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12
+; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12
; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 4
; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8
; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2
+; LMULMAX2-RV32-NEXT: lui a1, 209715
+; LMULMAX2-RV32-NEXT: addi a1, a1, 819
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12
; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32-NEXT: vlse64.v v12, (a1), zero
; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 2
; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8
; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-RV32-NEXT: lui a1, 349525
+; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12
; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12
; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v8
; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8
; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT: addi sp, sp, 32
+; LMULMAX2-RV32-NEXT: addi sp, sp, 16
; LMULMAX2-RV32-NEXT: ret
;
; LMULMAX2-RV64-LABEL: bitreverse_v4i64:
@@ -877,26 +871,14 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
;
; LMULMAX1-RV32-LABEL: bitreverse_v4i64:
; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: addi sp, sp, -32
-; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32
+; LMULMAX1-RV32-NEXT: addi sp, sp, -16
+; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16
; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT: addi a1, a0, 16
; LMULMAX1-RV32-NEXT: vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
+; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
; LMULMAX1-RV32-NEXT: lui a2, 1044480
-; LMULMAX1-RV32-NEXT: sw a2, 0(sp)
-; LMULMAX1-RV32-NEXT: lui a2, 61681
-; LMULMAX1-RV32-NEXT: addi a2, a2, -241
-; LMULMAX1-RV32-NEXT: sw a2, 28(sp)
-; LMULMAX1-RV32-NEXT: sw a2, 24(sp)
-; LMULMAX1-RV32-NEXT: lui a2, 209715
-; LMULMAX1-RV32-NEXT: addi a2, a2, 819
-; LMULMAX1-RV32-NEXT: sw a2, 20(sp)
-; LMULMAX1-RV32-NEXT: sw a2, 16(sp)
-; LMULMAX1-RV32-NEXT: lui a2, 349525
-; LMULMAX1-RV32-NEXT: addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT: sw a2, 12(sp)
; LMULMAX1-RV32-NEXT: sw a2, 8(sp)
; LMULMAX1-RV32-NEXT: li a2, 56
; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2
@@ -907,7 +889,7 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4
; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10
; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24
-; LMULMAX1-RV32-NEXT: mv a5, sp
+; LMULMAX1-RV32-NEXT: addi a5, sp, 8
; LMULMAX1-RV32-NEXT: vlse64.v v12, (a5), zero
; LMULMAX1-RV32-NEXT: lui a5, 4080
; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5
@@ -924,25 +906,34 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4
; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a3
; LMULMAX1-RV32-NEXT: vor.vv v9, v13, v9
-; LMULMAX1-RV32-NEXT: addi a6, sp, 24
-; LMULMAX1-RV32-NEXT: vlse64.v v13, (a6), zero
; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11
; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13
-; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v13
-; LMULMAX1-RV32-NEXT: addi a6, sp, 16
-; LMULMAX1-RV32-NEXT: vlse64.v v11, (a6), zero
+; LMULMAX1-RV32-NEXT: lui a6, 61681
+; LMULMAX1-RV32-NEXT: addi a6, a6, -241
+; LMULMAX1-RV32-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v11, a6
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11
+; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11
; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 4
; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9
; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT: addi a6, sp, 8
-; LMULMAX1-RV32-NEXT: vlse64.v v14, (a6), zero
+; LMULMAX1-RV32-NEXT: lui a6, 209715
+; LMULMAX1-RV32-NEXT: addi a6, a6, 819
+; LMULMAX1-RV32-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v13, a6
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13
+; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v13
; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 2
; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9
; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1
+; LMULMAX1-RV32-NEXT: lui a6, 349525
+; LMULMAX1-RV32-NEXT: addi a6, a6, 1365
+; LMULMAX1-RV32-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v14, a6
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v14
; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v14
; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v9
@@ -969,13 +960,13 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV32-NEXT: vor.vv v8, v15, v8
; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13
-; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v13
+; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11
+; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4
; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8
; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
+; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13
+; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v13
; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2
; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8
; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
@@ -985,7 +976,7 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8
; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT: vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT: addi sp, sp, 32
+; LMULMAX1-RV32-NEXT: addi sp, sp, 16
; LMULMAX1-RV32-NEXT: ret
;
; LMULMAX1-RV64-LABEL: bitreverse_v4i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
index e7c459cfc9ff7d..66d9280490613c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
@@ -1449,24 +1449,6 @@ declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v9, v0.t
@@ -1483,15 +1465,17 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v9, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10, v0.t
; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -1499,19 +1483,20 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_v2i64:
@@ -1568,24 +1553,6 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_v2i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsrl.vi v9, v8, 1
; RV32-NEXT: vor.vv v8, v8, v9
@@ -1602,15 +1569,17 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v9
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -1618,19 +1587,20 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v10, v8
; RV32-NEXT: vsrl.vi v9, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_v2i64_unmasked:
@@ -1691,24 +1661,6 @@ declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v10, v0.t
@@ -1725,15 +1677,17 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v10, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12, v0.t
; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -1741,19 +1695,20 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_v4i64:
@@ -1810,24 +1765,6 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_v4i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsrl.vi v10, v8, 1
; RV32-NEXT: vor.vv v8, v8, v10
@@ -1844,15 +1781,17 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12
; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -1860,19 +1799,20 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v12, v8
; RV32-NEXT: vsrl.vi v10, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_v4i64_unmasked:
@@ -1933,24 +1873,6 @@ declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v12, v0.t
@@ -1967,15 +1889,17 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v12, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16, v0.t
; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -1983,19 +1907,20 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_v8i64:
@@ -2052,24 +1977,6 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_v8i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsrl.vi v12, v8, 1
; RV32-NEXT: vor.vv v8, v8, v12
@@ -2086,15 +1993,17 @@ define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v12
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16
; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -2102,19 +2011,20 @@ define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v12, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_v8i64_unmasked:
@@ -4690,24 +4600,6 @@ define <16 x i32> @vp_ctlz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroex
define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_zero_undef_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v9, v0.t
@@ -4724,15 +4616,17 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
; RV32-NEXT: vor.vv v8, v8, v9, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10, v0.t
; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -4740,19 +4634,20 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_zero_undef_v2i64:
@@ -4809,24 +4704,6 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_zero_undef_v2i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsrl.vi v9, v8, 1
; RV32-NEXT: vor.vv v8, v8, v9
@@ -4843,15 +4720,17 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
; RV32-NEXT: vor.vv v8, v8, v9
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -4859,19 +4738,20 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
; RV32-NEXT: vadd.vv v8, v10, v8
; RV32-NEXT: vsrl.vi v9, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_zero_undef_v2i64_unmasked:
@@ -4930,24 +4810,6 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_zero_undef_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v10, v0.t
@@ -4964,15 +4826,17 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
; RV32-NEXT: vor.vv v8, v8, v10, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12, v0.t
; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -4980,19 +4844,20 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_zero_undef_v4i64:
@@ -5049,24 +4914,6 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_zero_undef_v4i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsrl.vi v10, v8, 1
; RV32-NEXT: vor.vv v8, v8, v10
@@ -5083,15 +4930,17 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12
; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -5099,19 +4948,20 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
; RV32-NEXT: vadd.vv v8, v12, v8
; RV32-NEXT: vsrl.vi v10, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_zero_undef_v4i64_unmasked:
@@ -5170,24 +5020,6 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_zero_undef_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v12, v0.t
@@ -5204,15 +5036,17 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
; RV32-NEXT: vor.vv v8, v8, v12, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16, v0.t
; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -5220,19 +5054,20 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_zero_undef_v8i64:
@@ -5289,24 +5124,6 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_ctlz_zero_undef_v8i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsrl.vi v12, v8, 1
; RV32-NEXT: vor.vv v8, v8, v12
@@ -5323,15 +5140,17 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
; RV32-NEXT: vor.vv v8, v8, v12
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16
; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -5339,19 +5158,20 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
; RV32-NEXT: vadd.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v12, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctlz_zero_undef_v8i64_unmasked:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 017b8612f7e69a..dd368488338c69 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -460,25 +460,8 @@ declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-LABEL: ctlz_v2i64:
; LMULMAX2-RV32I: # %bb.0:
-; LMULMAX2-RV32I-NEXT: addi sp, sp, -32
; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: lui a1, 349525
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 24(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 209715
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 16(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 61681
-; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 8(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 4112
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 0(sp)
; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2
@@ -493,29 +476,40 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-NEXT: vsrl.vx v9, v8, a1
; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 1
-; LMULMAX2-RV32I-NEXT: vand.vv v9, v11, v9
+; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
+; LMULMAX2-RV32I-NEXT: lui a1, 349525
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10
; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: vand.vv v9, v8, v10
+; LMULMAX2-RV32I-NEXT: lui a1, 209715
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9
; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT: mv a1, sp
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 4
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v11
; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT: lui a1, 61681
+; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT: lui a1, 4112
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: li a1, 56
; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: addi sp, sp, 32
; LMULMAX2-RV32I-NEXT: ret
;
; LMULMAX2-RV64I-LABEL: ctlz_v2i64:
@@ -1160,25 +1154,8 @@ declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-LABEL: ctlz_v4i64:
; LMULMAX2-RV32I: # %bb.0:
-; LMULMAX2-RV32I-NEXT: addi sp, sp, -32
; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: lui a1, 349525
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 24(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 209715
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 16(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 61681
-; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 8(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 4112
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 0(sp)
; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2
@@ -1193,29 +1170,40 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-NEXT: vsrl.vx v10, v8, a1
; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 1
-; LMULMAX2-RV32I-NEXT: vand.vv v10, v14, v10
+; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-RV32I-NEXT: lui a1, 349525
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12
; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v12
+; LMULMAX2-RV32I-NEXT: lui a1, 209715
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10
; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v12
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: mv a1, sp
-; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 4
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v14
; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v12
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8
+; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: lui a1, 61681
+; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: lui a1, 4112
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: li a1, 56
; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: addi sp, sp, 32
; LMULMAX2-RV32I-NEXT: ret
;
; LMULMAX2-RV64I-LABEL: ctlz_v4i64:
@@ -1788,25 +1776,8 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v2i64:
; LMULMAX2-RV32I: # %bb.0:
-; LMULMAX2-RV32I-NEXT: addi sp, sp, -32
; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: lui a1, 349525
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 24(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 209715
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 16(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 61681
-; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 8(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 4112
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 0(sp)
; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2
@@ -1821,29 +1792,40 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-NEXT: vsrl.vx v9, v8, a1
; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 1
-; LMULMAX2-RV32I-NEXT: vand.vv v9, v11, v9
+; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
+; LMULMAX2-RV32I-NEXT: lui a1, 349525
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10
; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: vand.vv v9, v8, v10
+; LMULMAX2-RV32I-NEXT: lui a1, 209715
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9
; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT: mv a1, sp
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 4
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v11
; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT: lui a1, 61681
+; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT: lui a1, 4112
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: li a1, 56
; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: addi sp, sp, 32
; LMULMAX2-RV32I-NEXT: ret
;
; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v2i64:
@@ -2458,25 +2440,8 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v4i64:
; LMULMAX2-RV32I: # %bb.0:
-; LMULMAX2-RV32I-NEXT: addi sp, sp, -32
; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: lui a1, 349525
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 24(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 209715
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 16(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 61681
-; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 8(sp)
-; LMULMAX2-RV32I-NEXT: lui a1, 4112
-; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 0(sp)
; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2
@@ -2491,29 +2456,40 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-NEXT: vsrl.vx v10, v8, a1
; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 1
-; LMULMAX2-RV32I-NEXT: vand.vv v10, v14, v10
+; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-RV32I-NEXT: lui a1, 349525
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12
; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v12
+; LMULMAX2-RV32I-NEXT: lui a1, 209715
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10
; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v12
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: mv a1, sp
-; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 4
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v14
; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v12
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8
+; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: lui a1, 61681
+; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: lui a1, 4112
+; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: li a1, 56
; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: addi sp, sp, 32
; LMULMAX2-RV32I-NEXT: ret
;
; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v4i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
index 10c576c1251d23..a7de50c8dc5d0e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
@@ -1073,35 +1073,19 @@ declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_ctpop_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10, v0.t
; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -1109,19 +1093,20 @@ define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctpop_v2i64:
@@ -1164,35 +1149,19 @@ define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_ctpop_v2i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -1200,19 +1169,20 @@ define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v10, v8
; RV32-NEXT: vsrl.vi v9, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctpop_v2i64_unmasked:
@@ -1259,35 +1229,19 @@ declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_ctpop_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12, v0.t
; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -1295,19 +1249,20 @@ define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctpop_v4i64:
@@ -1350,35 +1305,19 @@ define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_ctpop_v4i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12
; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -1386,19 +1325,20 @@ define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v12, v8
; RV32-NEXT: vsrl.vi v10, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctpop_v4i64_unmasked:
@@ -1445,35 +1385,19 @@ declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_ctpop_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16, v0.t
; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -1481,19 +1405,20 @@ define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctpop_v8i64:
@@ -1536,35 +1461,19 @@ define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_ctpop_v8i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16
; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -1572,19 +1481,20 @@ define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v12, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_ctpop_v8i64_unmasked:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index f027de4f1342bc..2b2d9c0a901e8f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -288,49 +288,42 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
define void @ctpop_v2i64(ptr %x, ptr %y) {
; LMULMAX2-RV32-LABEL: ctpop_v2i64:
; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: addi sp, sp, -32
-; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 32
; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
; LMULMAX2-RV32-NEXT: lui a1, 349525
; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 24(sp)
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-RV32-NEXT: vand.vv v9, v10, v9
+; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV32-NEXT: lui a1, 209715
; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 16(sp)
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v9
+; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9
+; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 8(sp)
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 0(sp)
-; LMULMAX2-RV32-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32-NEXT: vsrl.vi v11, v8, 1
-; LMULMAX2-RV32-NEXT: vand.vv v9, v11, v9
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT: vand.vv v9, v8, v10
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX2-RV32-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32-NEXT: mv a1, sp
-; LMULMAX2-RV32-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32-NEXT: vsrl.vi v11, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v11
-; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v10
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v9
; LMULMAX2-RV32-NEXT: li a1, 56
; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT: addi sp, sp, 32
; LMULMAX2-RV32-NEXT: ret
;
; LMULMAX2-RV64-LABEL: ctpop_v2i64:
@@ -371,49 +364,42 @@ define void @ctpop_v2i64(ptr %x, ptr %y) {
;
; LMULMAX1-RV32-LABEL: ctpop_v2i64:
; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: addi sp, sp, -32
-; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32
; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT: lui a1, 349525
; LMULMAX1-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT: sw a1, 28(sp)
-; LMULMAX1-RV32-NEXT: sw a1, 24(sp)
+; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX1-RV32-NEXT: vand.vv v9, v10, v9
+; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
; LMULMAX1-RV32-NEXT: lui a1, 209715
; LMULMAX1-RV32-NEXT: addi a1, a1, 819
-; LMULMAX1-RV32-NEXT: sw a1, 20(sp)
-; LMULMAX1-RV32-NEXT: sw a1, 16(sp)
+; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v9
+; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9
+; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
+; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
; LMULMAX1-RV32-NEXT: lui a1, 61681
; LMULMAX1-RV32-NEXT: addi a1, a1, -241
-; LMULMAX1-RV32-NEXT: sw a1, 12(sp)
-; LMULMAX1-RV32-NEXT: sw a1, 8(sp)
+; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9
; LMULMAX1-RV32-NEXT: lui a1, 4112
; LMULMAX1-RV32-NEXT: addi a1, a1, 257
-; LMULMAX1-RV32-NEXT: sw a1, 4(sp)
-; LMULMAX1-RV32-NEXT: sw a1, 0(sp)
-; LMULMAX1-RV32-NEXT: addi a1, sp, 24
-; LMULMAX1-RV32-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX1-RV32-NEXT: addi a1, sp, 16
-; LMULMAX1-RV32-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 1
-; LMULMAX1-RV32-NEXT: vand.vv v9, v11, v9
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vand.vv v9, v8, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT: addi a1, sp, 8
-; LMULMAX1-RV32-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX1-RV32-NEXT: mv a1, sp
-; LMULMAX1-RV32-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v10
+; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v9
; LMULMAX1-RV32-NEXT: li a1, 56
; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a1
; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT: addi sp, sp, 32
; LMULMAX1-RV32-NEXT: ret
;
; LMULMAX1-RV64-LABEL: ctpop_v2i64:
@@ -843,49 +829,42 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
define void @ctpop_v4i64(ptr %x, ptr %y) {
; LMULMAX2-RV32-LABEL: ctpop_v4i64:
; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: addi sp, sp, -32
-; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 32
; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
; LMULMAX2-RV32-NEXT: lui a1, 349525
; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 24(sp)
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 1
+; LMULMAX2-RV32-NEXT: vand.vv v10, v12, v10
+; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
; LMULMAX2-RV32-NEXT: lui a1, 209715
; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 16(sp)
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v10
+; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
+; LMULMAX2-RV32-NEXT: vadd.vv v8, v12, v8
+; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 8(sp)
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32-NEXT: sw a1, 0(sp)
-; LMULMAX2-RV32-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 1
-; LMULMAX2-RV32-NEXT: vand.vv v10, v14, v10
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v12
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32-NEXT: mv a1, sp
-; LMULMAX2-RV32-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v14
-; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v12
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v10
; LMULMAX2-RV32-NEXT: li a1, 56
; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT: addi sp, sp, 32
; LMULMAX2-RV32-NEXT: ret
;
; LMULMAX2-RV64-LABEL: ctpop_v4i64:
@@ -926,46 +905,40 @@ define void @ctpop_v4i64(ptr %x, ptr %y) {
;
; LMULMAX1-RV32-LABEL: ctpop_v4i64:
; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: addi sp, sp, -32
-; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32
; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT: addi a1, a0, 16
; LMULMAX1-RV32-NEXT: vle64.v v9, (a1)
; LMULMAX1-RV32-NEXT: lui a2, 349525
; LMULMAX1-RV32-NEXT: addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT: sw a2, 28(sp)
-; LMULMAX1-RV32-NEXT: sw a2, 24(sp)
+; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 1
+; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10
+; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11
; LMULMAX1-RV32-NEXT: lui a2, 209715
; LMULMAX1-RV32-NEXT: addi a2, a2, 819
-; LMULMAX1-RV32-NEXT: sw a2, 20(sp)
-; LMULMAX1-RV32-NEXT: sw a2, 16(sp)
-; LMULMAX1-RV32-NEXT: lui a2, 61681
-; LMULMAX1-RV32-NEXT: addi a2, a2, -241
-; LMULMAX1-RV32-NEXT: sw a2, 12(sp)
-; LMULMAX1-RV32-NEXT: sw a2, 8(sp)
-; LMULMAX1-RV32-NEXT: lui a2, 4112
-; LMULMAX1-RV32-NEXT: addi a2, a2, 257
-; LMULMAX1-RV32-NEXT: sw a2, 4(sp)
-; LMULMAX1-RV32-NEXT: sw a2, 0(sp)
-; LMULMAX1-RV32-NEXT: addi a2, sp, 24
-; LMULMAX1-RV32-NEXT: vlse64.v v10, (a2), zero
-; LMULMAX1-RV32-NEXT: addi a2, sp, 16
-; LMULMAX1-RV32-NEXT: vlse64.v v11, (a2), zero
-; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 1
-; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v10
-; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12
+; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v11
; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2
; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11
; LMULMAX1-RV32-NEXT: vadd.vv v9, v12, v9
-; LMULMAX1-RV32-NEXT: addi a2, sp, 8
-; LMULMAX1-RV32-NEXT: vlse64.v v12, (a2), zero
-; LMULMAX1-RV32-NEXT: mv a2, sp
-; LMULMAX1-RV32-NEXT: vlse64.v v13, (a2), zero
-; LMULMAX1-RV32-NEXT: vsrl.vi v14, v9, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v14
+; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 4
+; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12
+; LMULMAX1-RV32-NEXT: lui a2, 61681
+; LMULMAX1-RV32-NEXT: addi a2, a2, -241
+; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12
+; LMULMAX1-RV32-NEXT: lui a2, 4112
+; LMULMAX1-RV32-NEXT: addi a2, a2, 257
+; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2
+; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v13
; LMULMAX1-RV32-NEXT: li a2, 56
; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a2
@@ -983,7 +956,6 @@ define void @ctpop_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a2
; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT: vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT: addi sp, sp, 32
; LMULMAX1-RV32-NEXT: ret
;
; LMULMAX1-RV64-LABEL: ctpop_v4i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
index 95141df2cc5a14..0a94bbfb27c167 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
@@ -1233,39 +1233,23 @@ declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vand.vv v8, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10, v0.t
; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -1273,19 +1257,20 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_v2i64:
@@ -1332,39 +1317,23 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_v2i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsub.vx v9, v8, a1
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vand.vv v8, v8, v9
; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -1372,19 +1341,20 @@ define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v10, v8
; RV32-NEXT: vsrl.vi v9, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_v2i64_unmasked:
@@ -1435,39 +1405,23 @@ declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12, v0.t
; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -1475,19 +1429,20 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_v4i64:
@@ -1534,39 +1489,23 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_v4i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsub.vx v10, v8, a1
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vand.vv v8, v8, v10
; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12
; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -1574,19 +1513,20 @@ define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v12, v8
; RV32-NEXT: vsrl.vi v10, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_v4i64_unmasked:
@@ -1637,39 +1577,23 @@ declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsub.vx v12, v8, a1, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16, v0.t
; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -1677,19 +1601,20 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_v8i64:
@@ -1736,39 +1661,23 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_v8i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsub.vx v12, v8, a1
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vand.vv v8, v8, v12
; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16
; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -1776,19 +1685,20 @@ define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vadd.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v12, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_v8i64_unmasked:
@@ -3978,39 +3888,23 @@ define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroex
define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_zero_undef_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vand.vv v8, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10, v0.t
; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -4018,19 +3912,20 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_zero_undef_v2i64:
@@ -4077,39 +3972,23 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_zero_undef_v2i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsub.vx v9, v8, a1
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vand.vv v8, v8, v9
; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v10, v8, v9
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -4117,19 +3996,20 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
; RV32-NEXT: vadd.vv v8, v10, v8
; RV32-NEXT: vsrl.vi v9, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v9
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_zero_undef_v2i64_unmasked:
@@ -4178,39 +4058,23 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_zero_undef_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12, v0.t
; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -4218,19 +4082,20 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_zero_undef_v4i64:
@@ -4277,39 +4142,23 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_zero_undef_v4i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsub.vx v10, v8, a1
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vand.vv v8, v8, v10
; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v10, v10, v12
; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v12, v8, v10
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -4317,19 +4166,20 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
; RV32-NEXT: vadd.vv v8, v12, v8
; RV32-NEXT: vsrl.vi v10, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v10
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_zero_undef_v4i64_unmasked:
@@ -4378,39 +4228,23 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_zero_undef_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsub.vx v12, v8, a1, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16, v0.t
; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
@@ -4418,19 +4252,20 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_zero_undef_v8i64:
@@ -4477,39 +4312,23 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_cttz_zero_undef_v8i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsub.vx v12, v8, a1
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vand.vv v8, v8, v12
; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v12, v12, v16
; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v16, v8, v12
; RV32-NEXT: vsrl.vi v8, v8, 2
@@ -4517,19 +4336,20 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
; RV32-NEXT: vadd.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v12, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v12
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: vp_cttz_zero_undef_v8i64_unmasked:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 9cdcf59ba914d9..5165e716c73755 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -456,52 +456,46 @@ declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-LABEL: cttz_v2i64:
; LMULMAX2-RV32I: # %bb.0:
-; LMULMAX2-RV32I-NEXT: addi sp, sp, -32
; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
+; LMULMAX2-RV32I-NEXT: li a1, 1
+; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
+; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV32I-NEXT: lui a1, 349525
; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 24(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10
+; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: lui a1, 209715
; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 16(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9
+; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: lui a1, 61681
; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 8(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: lui a1, 4112
; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 0(sp)
-; LMULMAX2-RV32I-NEXT: li a1, 1
-; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 1
-; LMULMAX2-RV32I-NEXT: vand.vv v9, v11, v9
-; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: vand.vv v9, v8, v10
-; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT: mv a1, sp
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 4
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v11
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: li a1, 56
; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: addi sp, sp, 32
; LMULMAX2-RV32I-NEXT: ret
;
; LMULMAX2-RV64I-LABEL: cttz_v2i64:
@@ -1115,52 +1109,46 @@ declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-LABEL: cttz_v4i64:
; LMULMAX2-RV32I: # %bb.0:
-; LMULMAX2-RV32I-NEXT: addi sp, sp, -32
; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
+; LMULMAX2-RV32I-NEXT: li a1, 1
+; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1
+; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV32I-NEXT: lui a1, 349525
; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 24(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12
+; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: lui a1, 209715
; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 16(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10
+; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8
+; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: lui a1, 61681
; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 8(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: lui a1, 4112
; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 0(sp)
-; LMULMAX2-RV32I-NEXT: li a1, 1
-; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 1
-; LMULMAX2-RV32I-NEXT: vand.vv v10, v14, v10
-; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v12
-; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v12
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: mv a1, sp
-; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 4
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v14
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v12
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: li a1, 56
; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: addi sp, sp, 32
; LMULMAX2-RV32I-NEXT: ret
;
; LMULMAX2-RV64I-LABEL: cttz_v4i64:
@@ -1723,52 +1711,46 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v2i64:
; LMULMAX2-RV32I: # %bb.0:
-; LMULMAX2-RV32I-NEXT: addi sp, sp, -32
; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
+; LMULMAX2-RV32I-NEXT: li a1, 1
+; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
+; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV32I-NEXT: lui a1, 349525
; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 24(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10
+; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: lui a1, 209715
; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 16(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9
+; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: lui a1, 61681
; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 8(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: lui a1, 4112
; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 0(sp)
-; LMULMAX2-RV32I-NEXT: li a1, 1
-; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 1
-; LMULMAX2-RV32I-NEXT: vand.vv v9, v11, v9
-; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: vand.vv v9, v8, v10
-; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero
-; LMULMAX2-RV32I-NEXT: mv a1, sp
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 4
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v11
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9
; LMULMAX2-RV32I-NEXT: li a1, 56
; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: addi sp, sp, 32
; LMULMAX2-RV32I-NEXT: ret
;
; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v2i64:
@@ -2342,52 +2324,46 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v4i64:
; LMULMAX2-RV32I: # %bb.0:
-; LMULMAX2-RV32I-NEXT: addi sp, sp, -32
; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
+; LMULMAX2-RV32I-NEXT: li a1, 1
+; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1
+; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV32I-NEXT: lui a1, 349525
; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT: sw a1, 28(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 24(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12
+; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: lui a1, 209715
; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT: sw a1, 20(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 16(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10
+; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8
+; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: lui a1, 61681
; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT: sw a1, 12(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 8(sp)
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: lui a1, 4112
; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT: sw a1, 4(sp)
-; LMULMAX2-RV32I-NEXT: sw a1, 0(sp)
-; LMULMAX2-RV32I-NEXT: li a1, 1
-; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 24
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 16
-; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 1
-; LMULMAX2-RV32I-NEXT: vand.vv v10, v14, v10
-; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v12
-; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v12
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT: addi a1, sp, 8
-; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero
-; LMULMAX2-RV32I-NEXT: mv a1, sp
-; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero
-; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 4
-; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v14
-; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v12
+; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
+; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
; LMULMAX2-RV32I-NEXT: li a1, 56
; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT: addi sp, sp, 32
; LMULMAX2-RV32I-NEXT: ret
;
; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v4i64:
More information about the llvm-commits
mailing list