[llvm] 319adf5 - Revert "[RISCV][InsertVSETVLI] Avoid vmv.s.x SEW toggle if at start of block"

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed May 31 10:22:48 PDT 2023


In the future, please write the reason for revert in the commit message.

~Craig


On Wed, May 31, 2023 at 10:15 AM Luke Lau via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

>
> Author: Luke Lau
> Date: 2023-05-31T18:14:55+01:00
> New Revision: 319adf5de72a46345d0328122ebcd93c74d29592
>
> URL:
> https://github.com/llvm/llvm-project/commit/319adf5de72a46345d0328122ebcd93c74d29592
> DIFF:
> https://github.com/llvm/llvm-project/commit/319adf5de72a46345d0328122ebcd93c74d29592.diff
>
> LOG: Revert "[RISCV][InsertVSETVLI] Avoid vmv.s.x SEW toggle if at start
> of block"
>
> This reverts commit 0ba41dd3806e658e67acb63353fd5540f2bf333c.
>
> Added:
>
>
> Modified:
>     llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
>     llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll
>     llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
>     llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
>     llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
>     llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
>     llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
>     llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
>     llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
>     llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
>     llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
>     llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
>     llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
>     llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
>
> Removed:
>
>
>
>
> ################################################################################
> diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
> b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
> index 7ab8b3c4d0e9..b108129af52a 100644
> --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
> +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
> @@ -143,13 +143,7 @@ struct DemandedFields {
>    bool VLAny = false;
>    // Only zero vs non-zero is used. If demanded, can change non-zero
> values.
>    bool VLZeroness = false;
> -  // What properties of SEW we need to preserve.
> -  enum : uint8_t {
> -    SEWEqual = 2,              // The exact value of SEW needs to be
> preserved.
> -    SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's
> greater
> -                               // than or equal to the original value.
> -    SEWNone = 0                // We don't need to preserve SEW at all.
> -  } SEW = SEWNone;
> +  bool SEW = false;
>    bool LMUL = false;
>    bool SEWLMULRatio = false;
>    bool TailPolicy = false;
> @@ -167,7 +161,7 @@ struct DemandedFields {
>
>    // Mark all VTYPE subfields and properties as demanded
>    void demandVTYPE() {
> -    SEW = SEWEqual;
> +    SEW = true;
>      LMUL = true;
>      SEWLMULRatio = true;
>      TailPolicy = true;
> @@ -192,19 +186,7 @@ struct DemandedFields {
>      OS << "{";
>      OS << "VLAny=" << VLAny << ", ";
>      OS << "VLZeroness=" << VLZeroness << ", ";
> -    OS << "SEW=";
> -    switch (SEW) {
> -    case SEWEqual:
> -      OS << "SEWEqual";
> -      break;
> -    case SEWGreaterThanOrEqual:
> -      OS << "SEWGreaterThanOrEqual";
> -      break;
> -    case SEWNone:
> -      OS << "SEWNone";
> -      break;
> -    };
> -    OS << ", ";
> +    OS << "SEW=" << SEW << ", ";
>      OS << "LMUL=" << LMUL << ", ";
>      OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
>      OS << "TailPolicy=" << TailPolicy << ", ";
> @@ -222,44 +204,41 @@ inline raw_ostream &operator<<(raw_ostream &OS,
> const DemandedFields &DF) {
>  }
>  #endif
>
> -/// Return true if moving from CurVType to NewVType is
> -/// indistinguishable from the perspective of an instruction (or set
> -/// of instructions) which use only the Used subfields and properties.
> -static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
> -                                const DemandedFields &Used) {
> -  if (Used.SEW == DemandedFields::SEWEqual &&
> -      RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
> -    return false;
>
> -  if (Used.SEW == DemandedFields::SEWGreaterThanOrEqual &&
> -      RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
> +/// Return true if the two values of the VTYPE register provided are
> +/// indistinguishable from the perspective of an instruction (or set of
> +/// instructions) which use only the Used subfields and properties.
> +static bool areCompatibleVTYPEs(uint64_t VType1,
> +                                uint64_t VType2,
> +                                const DemandedFields &Used) {
> +  if (Used.SEW &&
> +      RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
>      return false;
>
>    if (Used.LMUL &&
> -      RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
> +      RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
>      return false;
>
>    if (Used.SEWLMULRatio) {
> -    auto Ratio1 =
> RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
> -
> RISCVVType::getVLMUL(CurVType));
> -    auto Ratio2 =
> RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
> -
> RISCVVType::getVLMUL(NewVType));
> +    auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType1),
> +
> RISCVVType::getVLMUL(VType1));
> +    auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType2),
> +
> RISCVVType::getVLMUL(VType2));
>      if (Ratio1 != Ratio2)
>        return false;
>    }
>
> -  if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
> -                             RISCVVType::isTailAgnostic(NewVType))
> +  if (Used.TailPolicy &&
> +      RISCVVType::isTailAgnostic(VType1) !=
> RISCVVType::isTailAgnostic(VType2))
>      return false;
> -  if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
> -                             RISCVVType::isMaskAgnostic(NewVType))
> +  if (Used.MaskPolicy &&
> +      RISCVVType::isMaskAgnostic(VType1) !=
> RISCVVType::isMaskAgnostic(VType2))
>      return false;
>    return true;
>  }
>
>  /// Return the fields and properties demanded by the provided instruction.
> -DemandedFields getDemanded(const MachineInstr &MI,
> -                           const MachineRegisterInfo *MRI) {
> +static DemandedFields getDemanded(const MachineInstr &MI) {
>    // Warning: This function has to work on both the lowered (i.e. post
>    // emitVSETVLIs) and pre-lowering forms.  The main implication of this
> is
>    // that it can't use the value of a SEW, VL, or Policy operand as they
> might
> @@ -291,7 +270,7 @@ DemandedFields getDemanded(const MachineInstr &MI,
>    // Note: We assume that the instructions initial SEW is the EEW encoded
>    // in the opcode.  This is asserted when constructing the VSETVLIInfo.
>    if (getEEWForLoadStore(MI)) {
> -    Res.SEW = DemandedFields::SEWNone;
> +    Res.SEW = false;
>      Res.LMUL = false;
>    }
>
> @@ -306,7 +285,7 @@ DemandedFields getDemanded(const MachineInstr &MI,
>    // * Probably ok if available VLMax is larger than demanded
>    // * The policy bits can probably be ignored..
>    if (isMaskRegOp(MI)) {
> -    Res.SEW = DemandedFields::SEWNone;
> +    Res.SEW = false;
>      Res.LMUL = false;
>    }
>
> @@ -315,17 +294,6 @@ DemandedFields getDemanded(const MachineInstr &MI,
>      Res.LMUL = false;
>      Res.SEWLMULRatio = false;
>      Res.VLAny = false;
> -    // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand,
> we don't
> -    // need to preserve any other bits and are thus compatible with any
> larger,
> -    // etype and can disregard policy bits.  Warning: It's tempting to
> try doing
> -    // this for any tail agnostic operation, but we can't as TA requires
> -    // tail lanes to either be the original value or -1.  We are writing
> -    // unknown bits to the lanes here.
> -    auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
> -    if (VRegDef && VRegDef->isImplicitDef()) {
> -      Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
> -      Res.TailPolicy = false;
> -    }
>    }
>
>    return Res;
> @@ -483,7 +451,7 @@ class VSETVLIInfo {
>
>    bool hasCompatibleVTYPE(const DemandedFields &Used,
>                            const VSETVLIInfo &Require) const {
> -    return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(),
> Used);
> +    return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(),
> Used);
>    }
>
>    // Determine whether the vector instructions requirements represented by
> @@ -515,7 +483,7 @@ class VSETVLIInfo {
>      if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI))
>        return false;
>
> -    return hasCompatibleVTYPE(Used, Require);
> +    return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(),
> Used);
>    }
>
>    bool operator==(const VSETVLIInfo &Other) const {
> @@ -888,7 +856,22 @@ bool RISCVInsertVSETVLI::needVSETVLI(const
> MachineInstr &MI,
>    if (!CurInfo.isValid() || CurInfo.isUnknown() ||
> CurInfo.hasSEWLMULRatioOnly())
>      return true;
>
> -  DemandedFields Used = getDemanded(MI, MRI);
> +  DemandedFields Used = getDemanded(MI);
> +
> +  if (isScalarMoveInstr(MI)) {
> +    // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand,
> we don't
> +    // need to preserve any other bits and are thus compatible with any
> larger,
> +    // etype and can disregard policy bits.  Warning: It's tempting to
> try doing
> +    // this for any tail agnostic operation, but we can't as TA requires
> +    // tail lanes to either be the original value or -1.  We are writing
> +    // unknown bits to the lanes here.
> +    auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
> +    if (VRegDef && VRegDef->isImplicitDef() &&
> +        CurInfo.getSEW() >= Require.getSEW()) {
> +      Used.SEW = false;
> +      Used.TailPolicy = false;
> +    }
> +  }
>
>    // A slidedown/slideup with an IMPLICIT_DEF merge op can freely clobber
>    // elements not copied from the source vector (e.g. masked off, tail, or
> @@ -1324,7 +1307,7 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock
> &MBB) {
>  static void doUnion(DemandedFields &A, DemandedFields B) {
>    A.VLAny |= B.VLAny;
>    A.VLZeroness |= B.VLZeroness;
> -  A.SEW = std::max(A.SEW, B.SEW);
> +  A.SEW |= B.SEW;
>    A.LMUL |= B.LMUL;
>    A.SEWLMULRatio |= B.SEWLMULRatio;
>    A.TailPolicy |= B.TailPolicy;
> @@ -1394,7 +1377,7 @@ void
> RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
>    for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
>
>      if (!isVectorConfigInstr(MI)) {
> -      doUnion(Used, getDemanded(MI, MRI));
> +      doUnion(Used, getDemanded(MI));
>        continue;
>      }
>
> @@ -1422,7 +1405,7 @@ void
> RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
>        }
>      }
>      NextMI = &MI;
> -    Used = getDemanded(MI, MRI);
> +    Used = getDemanded(MI);
>    }
>
>    for (auto *MI : ToDelete)
>
> diff  --git
> a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll
> b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll
> index deceb6f89c9a..37f67cad23e2 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll
> @@ -182,8 +182,9 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32>
> %v1) {
>  ; CHECK-LABEL: trn2.v2i32:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 2
> -; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
>  ; CHECK-NEXT:    vrgather.vi v10, v8, 1
>  ; CHECK-NEXT:    vrgather.vi v10, v9, 1, v0.t
>  ; CHECK-NEXT:    vmv1r.v v8, v10
> @@ -238,8 +239,9 @@ define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64>
> %v1) {
>  ; CHECK-LABEL: trn2.v2i64:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 2
> -; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
>  ; CHECK-NEXT:    vrgather.vi v10, v8, 1
>  ; CHECK-NEXT:    vrgather.vi v10, v9, 1, v0.t
>  ; CHECK-NEXT:    vmv.v.v v8, v10
> @@ -262,8 +264,9 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x
> float> %v1) {
>  ; CHECK-LABEL: trn2.v2f32:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 2
> -; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
>  ; CHECK-NEXT:    vrgather.vi v10, v8, 1
>  ; CHECK-NEXT:    vrgather.vi v10, v9, 1, v0.t
>  ; CHECK-NEXT:    vmv1r.v v8, v10
> @@ -318,8 +321,9 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x
> double> %v1) {
>  ; CHECK-LABEL: trn2.v2f64:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 2
> -; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
>  ; CHECK-NEXT:    vrgather.vi v10, v8, 1
>  ; CHECK-NEXT:    vrgather.vi v10, v9, 1, v0.t
>  ; CHECK-NEXT:    vmv.v.v v8, v10
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
> b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
> index 8bcb74c31c89..eb45b3cf3354 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
> @@ -1400,8 +1400,9 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va,
> <2 x i1> %m, i32 zeroext %e
>  ; RV32-NEXT:    vand.vx v11, v11, a4, v0.t
>  ; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
>  ; RV32-NEXT:    li a5, 5
> -; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a5
> +; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV32-NEXT:    vmv.v.i v13, 0
>  ; RV32-NEXT:    lui a5, 1044480
>  ; RV32-NEXT:    vmerge.vxm v13, v13, a5, v0
> @@ -1524,8 +1525,9 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x
> i64> %va, i32 zeroext %evl)
>  ; RV32-NEXT:    vor.vv v9, v10, v9
>  ; RV32-NEXT:    vsrl.vi v10, v8, 8
>  ; RV32-NEXT:    li a4, 5
> -; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a4
> +; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV32-NEXT:    vmv.v.i v11, 0
>  ; RV32-NEXT:    lui a4, 1044480
>  ; RV32-NEXT:    vmerge.vxm v11, v11, a4, v0
> @@ -1658,8 +1660,9 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va,
> <4 x i1> %m, i32 zeroext %e
>  ; RV32-NEXT:    vand.vx v14, v14, a4, v0.t
>  ; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
>  ; RV32-NEXT:    li a5, 85
> -; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a5
> +; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
>  ; RV32-NEXT:    vmv.v.i v18, 0
>  ; RV32-NEXT:    lui a5, 1044480
>  ; RV32-NEXT:    vmerge.vxm v18, v18, a5, v0
> @@ -1782,8 +1785,9 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x
> i64> %va, i32 zeroext %evl)
>  ; RV32-NEXT:    vor.vv v10, v12, v10
>  ; RV32-NEXT:    vsrl.vi v12, v8, 8
>  ; RV32-NEXT:    li a4, 85
> -; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a4
> +; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
>  ; RV32-NEXT:    vmv.v.i v14, 0
>  ; RV32-NEXT:    lui a4, 1044480
>  ; RV32-NEXT:    vmerge.vxm v14, v14, a4, v0
> @@ -1917,8 +1921,9 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va,
> <8 x i1> %m, i32 zeroext %e
>  ; RV32-NEXT:    vsrl.vi v28, v8, 8, v0.t
>  ; RV32-NEXT:    lui a5, 5
>  ; RV32-NEXT:    addi a5, a5, 1365
> -; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a5
> +; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
>  ; RV32-NEXT:    vmv.v.i v20, 0
>  ; RV32-NEXT:    lui a5, 1044480
>  ; RV32-NEXT:    vmerge.vxm v20, v20, a5, v0
> @@ -2042,8 +2047,9 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x
> i64> %va, i32 zeroext %evl)
>  ; RV32-NEXT:    vsrl.vi v20, v8, 8
>  ; RV32-NEXT:    lui a4, 5
>  ; RV32-NEXT:    addi a4, a4, 1365
> -; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a4
> +; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
>  ; RV32-NEXT:    vmv.v.i v16, 0
>  ; RV32-NEXT:    lui a4, 1044480
>  ; RV32-NEXT:    vmerge.vxm v16, v16, a4, v0
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
> b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
> index 590c631c541e..9280565c3192 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
> @@ -440,8 +440,9 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x
> i1> %m, i32 zeroext %evl) {
>  ; RV32-NEXT:    vand.vx v11, v11, a4, v0.t
>  ; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
>  ; RV32-NEXT:    li a5, 5
> -; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a5
> +; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV32-NEXT:    vmv.v.i v13, 0
>  ; RV32-NEXT:    lui a5, 1044480
>  ; RV32-NEXT:    vmerge.vxm v13, v13, a5, v0
> @@ -513,8 +514,9 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64>
> %va, i32 zeroext %evl) {
>  ; RV32-NEXT:    vor.vv v9, v10, v9
>  ; RV32-NEXT:    vsrl.vi v10, v8, 8
>  ; RV32-NEXT:    li a4, 5
> -; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a4
> +; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV32-NEXT:    vmv.v.i v11, 0
>  ; RV32-NEXT:    lui a4, 1044480
>  ; RV32-NEXT:    vmerge.vxm v11, v11, a4, v0
> @@ -596,8 +598,9 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x
> i1> %m, i32 zeroext %evl) {
>  ; RV32-NEXT:    vand.vx v14, v14, a4, v0.t
>  ; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
>  ; RV32-NEXT:    li a5, 85
> -; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a5
> +; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
>  ; RV32-NEXT:    vmv.v.i v18, 0
>  ; RV32-NEXT:    lui a5, 1044480
>  ; RV32-NEXT:    vmerge.vxm v18, v18, a5, v0
> @@ -669,8 +672,9 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64>
> %va, i32 zeroext %evl) {
>  ; RV32-NEXT:    vor.vv v10, v12, v10
>  ; RV32-NEXT:    vsrl.vi v12, v8, 8
>  ; RV32-NEXT:    li a4, 85
> -; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a4
> +; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
>  ; RV32-NEXT:    vmv.v.i v14, 0
>  ; RV32-NEXT:    lui a4, 1044480
>  ; RV32-NEXT:    vmerge.vxm v14, v14, a4, v0
> @@ -753,8 +757,9 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x
> i1> %m, i32 zeroext %evl) {
>  ; RV32-NEXT:    vsrl.vi v28, v8, 8, v0.t
>  ; RV32-NEXT:    lui a5, 5
>  ; RV32-NEXT:    addi a5, a5, 1365
> -; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a5
> +; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
>  ; RV32-NEXT:    vmv.v.i v20, 0
>  ; RV32-NEXT:    lui a5, 1044480
>  ; RV32-NEXT:    vmerge.vxm v20, v20, a5, v0
> @@ -827,8 +832,9 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64>
> %va, i32 zeroext %evl) {
>  ; RV32-NEXT:    vsrl.vi v20, v8, 8
>  ; RV32-NEXT:    lui a4, 5
>  ; RV32-NEXT:    addi a4, a4, 1365
> -; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a4
> +; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
>  ; RV32-NEXT:    vmv.v.i v16, 0
>  ; RV32-NEXT:    lui a4, 1044480
>  ; RV32-NEXT:    vmerge.vxm v16, v16, a4, v0
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
> b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
> index 3418f8660202..30ebcf7651f2 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
> @@ -36,8 +36,9 @@ define <4 x float>
> @hang_when_merging_stores_after_legalization(<8 x float> %x,
>  ; LMULMAX1-LABEL: hang_when_merging_stores_after_legalization:
>  ; LMULMAX1:       # %bb.0:
>  ; LMULMAX1-NEXT:    li a0, 2
> -; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
> +; LMULMAX1-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; LMULMAX1-NEXT:    vmv.s.x v0, a0
> +; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
>  ; LMULMAX1-NEXT:    vrgather.vi v12, v8, 0
>  ; LMULMAX1-NEXT:    vrgather.vi v12, v9, 3, v0.t
>  ; LMULMAX1-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
> @@ -151,8 +152,9 @@ define void @buildvec_merge0_v4f32(<4 x float>* %x,
> float %f) {
>  ; CHECK-LABEL: buildvec_merge0_v4f32:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a1, 6
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a1
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.v.f v8, fa0
>  ; CHECK-NEXT:    lui a1, 262144
>  ; CHECK-NEXT:    vmerge.vxm v8, v8, a1, v0
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
> b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
> index 944dea21335e..4d7d08e14f0a 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
> @@ -6,8 +6,9 @@ define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half>
> %y) {
>  ; CHECK-LABEL: shuffle_v4f16:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 11
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
>  ; CHECK-NEXT:    ret
>    %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 0, i32
> 1, i32 6, i32 3>
> @@ -18,8 +19,9 @@ define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x
> float> %y) {
>  ; CHECK-LABEL: shuffle_v8f32:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 236
> -; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
>  ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
>  ; CHECK-NEXT:    ret
>    %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 8,
> i32 9, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
> @@ -32,8 +34,9 @@ define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) {
>  ; RV32-NEXT:    li a0, 9
>  ; RV32-NEXT:    lui a1, %hi(.LCPI2_0)
>  ; RV32-NEXT:    fld fa5, %lo(.LCPI2_0)(a1)
> -; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a0
> +; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
>  ; RV32-NEXT:    vfmerge.vfm v8, v8, fa5, v0
>  ; RV32-NEXT:    ret
>  ;
> @@ -42,8 +45,9 @@ define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) {
>  ; RV64-NEXT:    lui a0, %hi(.LCPI2_0)
>  ; RV64-NEXT:    fld fa5, %lo(.LCPI2_0)(a0)
>  ; RV64-NEXT:    li a0, 9
> -; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
> +; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV64-NEXT:    vmv.s.x v0, a0
> +; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
>  ; RV64-NEXT:    vfmerge.vfm v8, v8, fa5, v0
>  ; RV64-NEXT:    ret
>    %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0,
> double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
> @@ -56,8 +60,9 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
>  ; RV32-NEXT:    li a0, 6
>  ; RV32-NEXT:    lui a1, %hi(.LCPI3_0)
>  ; RV32-NEXT:    fld fa5, %lo(.LCPI3_0)(a1)
> -; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a0
> +; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
>  ; RV32-NEXT:    vfmerge.vfm v8, v8, fa5, v0
>  ; RV32-NEXT:    ret
>  ;
> @@ -66,8 +71,9 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
>  ; RV64-NEXT:    lui a0, %hi(.LCPI3_0)
>  ; RV64-NEXT:    fld fa5, %lo(.LCPI3_0)(a0)
>  ; RV64-NEXT:    li a0, 6
> -; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
> +; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV64-NEXT:    vmv.s.x v0, a0
> +; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
>  ; RV64-NEXT:    vfmerge.vfm v8, v8, fa5, v0
>  ; RV64-NEXT:    ret
>    %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double
> 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
> @@ -156,8 +162,9 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x
> double> %x) {
>  ; RV32-LABEL: vrgather_shuffle_xv_v4f64:
>  ; RV32:       # %bb.0:
>  ; RV32-NEXT:    li a0, 12
> -; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
> +; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; RV32-NEXT:    vmv.s.x v0, a0
> +; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; RV32-NEXT:    lui a0, %hi(.LCPI7_0)
>  ; RV32-NEXT:    addi a0, a0, %lo(.LCPI7_0)
>  ; RV32-NEXT:    vlse64.v v10, (a0), zero
> @@ -170,12 +177,13 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x
> double> %x) {
>  ;
>  ; RV64-LABEL: vrgather_shuffle_xv_v4f64:
>  ; RV64:       # %bb.0:
> +; RV64-NEXT:    li a0, 12
> +; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
> +; RV64-NEXT:    vmv.s.x v0, a0
>  ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
>  ; RV64-NEXT:    lui a0, %hi(.LCPI7_0)
>  ; RV64-NEXT:    addi a0, a0, %lo(.LCPI7_0)
>  ; RV64-NEXT:    vlse64.v v10, (a0), zero
> -; RV64-NEXT:    li a0, 12
> -; RV64-NEXT:    vmv.s.x v0, a0
>  ; RV64-NEXT:    vid.v v12
>  ; RV64-NEXT:    vrsub.vi v12, v12, 4
>  ; RV64-NEXT:    vrgather.vv v10, v8, v12, v0.t
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
> b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
> index 3c9a052f65e2..5f07294c6c30 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
> @@ -6,8 +6,9 @@ define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16>
> %y) {
>  ; CHECK-LABEL: shuffle_v4i16:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 11
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
>  ; CHECK-NEXT:    ret
>    %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> <i32 0, i32 1,
> i32 6, i32 3>
> @@ -18,8 +19,9 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %x, <8 x i32>
> %y) {
>  ; CHECK-LABEL: shuffle_v8i32:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 203
> -; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
>  ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
>  ; CHECK-NEXT:    ret
>    %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1,
> i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
> @@ -30,8 +32,9 @@ define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) {
>  ; CHECK-LABEL: shuffle_xv_v4i16:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 9
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; CHECK-NEXT:    vmerge.vim v8, v8, 5, v0
>  ; CHECK-NEXT:    ret
>    %s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16>
> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
> @@ -42,8 +45,9 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
>  ; CHECK-LABEL: shuffle_vx_v4i16:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 6
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; CHECK-NEXT:    vmerge.vim v8, v8, 5, v0
>  ; CHECK-NEXT:    ret
>    %s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16
> 5>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
> @@ -99,8 +103,9 @@ define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16>
> %x) {
>  ; CHECK-LABEL: vrgather_shuffle_xv_v4i16:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 12
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
> +; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v0, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
>  ; CHECK-NEXT:    vid.v v9
>  ; CHECK-NEXT:    vrsub.vi v10, v9, 4
>  ; CHECK-NEXT:    vmv.v.i v9, 5
> @@ -233,12 +238,13 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x
> i64> %x) {
>  ;
>  ; RV64-LABEL: vrgather_shuffle_xv_v8i64:
>  ; RV64:       # %bb.0:
> -; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
> +; RV64-NEXT:    li a0, 113
> +; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
> +; RV64-NEXT:    vmv.s.x v0, a0
>  ; RV64-NEXT:    lui a0, %hi(.LCPI12_0)
>  ; RV64-NEXT:    addi a0, a0, %lo(.LCPI12_0)
> +; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
>  ; RV64-NEXT:    vle64.v v16, (a0)
> -; RV64-NEXT:    li a0, 113
> -; RV64-NEXT:    vmv.s.x v0, a0
>  ; RV64-NEXT:    vmv.v.i v12, -1
>  ; RV64-NEXT:    vrgather.vv v12, v8, v16, v0.t
>  ; RV64-NEXT:    vmv.v.v v8, v12
> @@ -267,12 +273,13 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x
> i64> %x) {
>  ;
>  ; RV64-LABEL: vrgather_shuffle_vx_v8i64:
>  ; RV64:       # %bb.0:
> -; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
> +; RV64-NEXT:    li a0, 115
> +; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
> +; RV64-NEXT:    vmv.s.x v0, a0
>  ; RV64-NEXT:    lui a0, %hi(.LCPI13_0)
>  ; RV64-NEXT:    addi a0, a0, %lo(.LCPI13_0)
> +; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
>  ; RV64-NEXT:    vle64.v v16, (a0)
> -; RV64-NEXT:    li a0, 115
> -; RV64-NEXT:    vmv.s.x v0, a0
>  ; RV64-NEXT:    vmv.v.i v12, 5
>  ; RV64-NEXT:    vrgather.vv v12, v8, v16, v0.t
>  ; RV64-NEXT:    vmv.v.v v8, v12
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
> b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
> index baf5f9c98826..65055b01946e 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
> @@ -230,8 +230,9 @@ define void @splat_v4i64(ptr %x, i64 %y) {
>  ; LMULMAX1-RV32-LABEL: splat_v4i64:
>  ; LMULMAX1-RV32:       # %bb.0:
>  ; LMULMAX1-RV32-NEXT:    li a3, 5
> -; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
> +; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
>  ; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a3
> +; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; LMULMAX1-RV32-NEXT:    vmv.v.x v8, a2
>  ; LMULMAX1-RV32-NEXT:    vmerge.vxm v8, v8, a1, v0
>  ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
> b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
> index f4cd2ea48d9d..9ef4bb1dfbf0 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
> @@ -745,11 +745,12 @@ define <128 x i1> @buildvec_mask_v128i1() {
>  ; RV32-LMULMAX8:       # %bb.0:
>  ; RV32-LMULMAX8-NEXT:    lui a0, 748388
>  ; RV32-LMULMAX8-NEXT:    addi a0, a0, -1793
> -; RV32-LMULMAX8-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
> +; RV32-LMULMAX8-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV32-LMULMAX8-NEXT:    vmv.s.x v8, a0
>  ; RV32-LMULMAX8-NEXT:    lui a0, 748384
>  ; RV32-LMULMAX8-NEXT:    addi a0, a0, 1776
>  ; RV32-LMULMAX8-NEXT:    vmv.s.x v0, a0
> +; RV32-LMULMAX8-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
>  ; RV32-LMULMAX8-NEXT:    vslideup.vi v0, v8, 1
>  ; RV32-LMULMAX8-NEXT:    lui a0, 551776
>  ; RV32-LMULMAX8-NEXT:    addi a0, a0, 1776
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
> b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
> index a3b808dea13d..472efcc0c16b 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
> @@ -477,16 +477,18 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x
> i1> %m, <4 x i8> %passthru)
>  ; RV64ZVE32F-NEXT:  .LBB8_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB8_3
>  ; RV64ZVE32F-NEXT:  .LBB8_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB8_4
> @@ -543,16 +545,18 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr>
> %ptrs, <4 x i8> %passthru) {
>  ; RV64ZVE32F-NEXT:  .LBB9_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB9_3
>  ; RV64ZVE32F-NEXT:  .LBB9_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB9_4
> @@ -643,48 +647,54 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x
> i1> %m, <8 x i8> %passthru)
>  ; RV64ZVE32F-NEXT:  .LBB11_10: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB11_3
>  ; RV64ZVE32F-NEXT:  .LBB11_11: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB11_4
>  ; RV64ZVE32F-NEXT:  .LBB11_12: # %cond.load7
>  ; RV64ZVE32F-NEXT:    ld a2, 24(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB11_5
>  ; RV64ZVE32F-NEXT:  .LBB11_13: # %cond.load10
>  ; RV64ZVE32F-NEXT:    ld a2, 32(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB11_6
>  ; RV64ZVE32F-NEXT:  .LBB11_14: # %cond.load13
>  ; RV64ZVE32F-NEXT:    ld a2, 40(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 5
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 64
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB11_7
>  ; RV64ZVE32F-NEXT:  .LBB11_15: # %cond.load16
>  ; RV64ZVE32F-NEXT:    ld a2, 48(a0)
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e8, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e8, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB11_8
> @@ -801,8 +811,9 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x
> i8> %idxs, <8 x i1> %m, <8
>  ; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB12_9
> @@ -1199,16 +1210,18 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs,
> <4 x i1> %m, <4 x i16> %passthr
>  ; RV64ZVE32F-NEXT:  .LBB19_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB19_3
>  ; RV64ZVE32F-NEXT:  .LBB19_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB19_4
> @@ -1265,16 +1278,18 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr>
> %ptrs, <4 x i16> %passthru) {
>  ; RV64ZVE32F-NEXT:  .LBB20_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB20_3
>  ; RV64ZVE32F-NEXT:  .LBB20_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB20_4
> @@ -1365,48 +1380,54 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs,
> <8 x i1> %m, <8 x i16> %passthr
>  ; RV64ZVE32F-NEXT:  .LBB22_10: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB22_3
>  ; RV64ZVE32F-NEXT:  .LBB22_11: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB22_4
>  ; RV64ZVE32F-NEXT:  .LBB22_12: # %cond.load7
>  ; RV64ZVE32F-NEXT:    ld a2, 24(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB22_5
>  ; RV64ZVE32F-NEXT:  .LBB22_13: # %cond.load10
>  ; RV64ZVE32F-NEXT:    ld a2, 32(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB22_6
>  ; RV64ZVE32F-NEXT:  .LBB22_14: # %cond.load13
>  ; RV64ZVE32F-NEXT:    ld a2, 40(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 5
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 64
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB22_7
>  ; RV64ZVE32F-NEXT:  .LBB22_15: # %cond.load16
>  ; RV64ZVE32F-NEXT:    ld a2, 48(a0)
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB22_8
> @@ -1465,8 +1486,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr
> %base, <8 x i8> %idxs, <8 x i1>
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
>  ; RV64ZVE32F-NEXT:  .LBB23_4: # %else2
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -1478,8 +1500,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr
> %base, <8 x i8> %idxs, <8 x i1>
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
>  ; RV64ZVE32F-NEXT:  .LBB23_6: # %else5
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
> @@ -1499,8 +1522,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr
> %base, <8 x i8> %idxs, <8 x i1>
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 5
>  ; RV64ZVE32F-NEXT:  .LBB23_10: # %else14
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -1520,8 +1544,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr
> %base, <8 x i8> %idxs, <8 x i1>
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB23_8
> @@ -1531,8 +1556,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr
> %base, <8 x i8> %idxs, <8 x i1>
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB23_9
> @@ -1542,8 +1568,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr
> %base, <8 x i8> %idxs, <8 x i1>
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB23_12
> @@ -1608,8 +1635,9 @@ define <8 x i16>
> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
>  ; RV64ZVE32F-NEXT:  .LBB24_4: # %else2
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -1621,8 +1649,9 @@ define <8 x i16>
> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
>  ; RV64ZVE32F-NEXT:  .LBB24_6: # %else5
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
> @@ -1642,8 +1671,9 @@ define <8 x i16>
> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 5
>  ; RV64ZVE32F-NEXT:  .LBB24_10: # %else14
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -1663,8 +1693,9 @@ define <8 x i16>
> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB24_8
> @@ -1674,8 +1705,9 @@ define <8 x i16>
> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB24_9
> @@ -1685,8 +1717,9 @@ define <8 x i16>
> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB24_12
> @@ -1754,8 +1787,9 @@ define <8 x i16>
> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
>  ; RV64ZVE32F-NEXT:  .LBB25_4: # %else2
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -1768,8 +1802,9 @@ define <8 x i16>
> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
>  ; RV64ZVE32F-NEXT:  .LBB25_6: # %else5
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
> @@ -1790,8 +1825,9 @@ define <8 x i16>
> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 5
>  ; RV64ZVE32F-NEXT:  .LBB25_10: # %else14
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -1812,8 +1848,9 @@ define <8 x i16>
> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB25_8
> @@ -1824,8 +1861,9 @@ define <8 x i16>
> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB25_9
> @@ -1836,8 +1874,9 @@ define <8 x i16>
> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB25_12
> @@ -1971,8 +2010,9 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base,
> <8 x i16> %idxs, <8 x i1> %m,
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB26_9
> @@ -2263,16 +2303,18 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs,
> <4 x i1> %m, <4 x i32> %passthr
>  ; RV64ZVE32F-NEXT:  .LBB31_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB31_3
>  ; RV64ZVE32F-NEXT:  .LBB31_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB31_4
> @@ -2328,16 +2370,18 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x ptr>
> %ptrs, <4 x i32> %passthru) {
>  ; RV64ZVE32F-NEXT:  .LBB32_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB32_3
>  ; RV64ZVE32F-NEXT:  .LBB32_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB32_4
> @@ -2428,48 +2472,54 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs,
> <8 x i1> %m, <8 x i32> %passthr
>  ; RV64ZVE32F-NEXT:  .LBB34_10: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB34_3
>  ; RV64ZVE32F-NEXT:  .LBB34_11: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 2
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB34_4
>  ; RV64ZVE32F-NEXT:  .LBB34_12: # %cond.load7
>  ; RV64ZVE32F-NEXT:    ld a2, 24(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB34_5
>  ; RV64ZVE32F-NEXT:  .LBB34_13: # %cond.load10
>  ; RV64ZVE32F-NEXT:    ld a2, 32(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB34_6
>  ; RV64ZVE32F-NEXT:  .LBB34_14: # %cond.load13
>  ; RV64ZVE32F-NEXT:    ld a2, 40(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 5
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 64
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB34_7
>  ; RV64ZVE32F-NEXT:  .LBB34_15: # %cond.load16
>  ; RV64ZVE32F-NEXT:    ld a2, 48(a0)
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB34_8
> @@ -2597,8 +2647,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr
> %base, <8 x i8> %idxs, <8 x i1>
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB35_9
> @@ -2745,8 +2796,9 @@ define <8 x i32>
> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB36_9
> @@ -2900,8 +2952,9 @@ define <8 x i32>
> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB37_9
> @@ -3052,8 +3105,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr
> %base, <8 x i16> %idxs, <8 x i
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB38_9
> @@ -3201,8 +3255,9 @@ define <8 x i32>
> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB39_9
> @@ -3359,8 +3414,9 @@ define <8 x i32>
> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
>  ; RV64ZVE32F-NEXT:    slli a3, a3, 2
>  ; RV64ZVE32F-NEXT:    add a3, a0, a3
>  ; RV64ZVE32F-NEXT:    lw a3, 0(a3)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v12, a3
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a3, a2, 32
>  ; RV64ZVE32F-NEXT:    bnez a3, .LBB40_9
> @@ -3503,13 +3559,13 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base,
> <8 x i32> %idxs, <8 x i1> %m,
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB41_7
>  ; RV64ZVE32F-NEXT:  .LBB41_14: # %cond.load10
> -; RV64ZVE32F-NEXT:    vsetivli zero, 0, e32, m1, ta, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB41_8
> @@ -7138,16 +7194,18 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs,
> <4 x i1> %m, <4 x half> %passt
>  ; RV64ZVE32F-NEXT:  .LBB60_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB60_3
>  ; RV64ZVE32F-NEXT:  .LBB60_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB60_4
> @@ -7204,16 +7262,18 @@ define <4 x half> @mgather_truemask_v4f16(<4 x
> ptr> %ptrs, <4 x half> %passthru)
>  ; RV64ZVE32F-NEXT:  .LBB61_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB61_3
>  ; RV64ZVE32F-NEXT:  .LBB61_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB61_4
> @@ -7304,48 +7364,54 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs,
> <8 x i1> %m, <8 x half> %passt
>  ; RV64ZVE32F-NEXT:  .LBB63_10: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB63_3
>  ; RV64ZVE32F-NEXT:  .LBB63_11: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB63_4
>  ; RV64ZVE32F-NEXT:  .LBB63_12: # %cond.load7
>  ; RV64ZVE32F-NEXT:    ld a2, 24(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB63_5
>  ; RV64ZVE32F-NEXT:  .LBB63_13: # %cond.load10
>  ; RV64ZVE32F-NEXT:    ld a2, 32(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB63_6
>  ; RV64ZVE32F-NEXT:  .LBB63_14: # %cond.load13
>  ; RV64ZVE32F-NEXT:    ld a2, 40(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 5
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 64
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB63_7
>  ; RV64ZVE32F-NEXT:  .LBB63_15: # %cond.load16
>  ; RV64ZVE32F-NEXT:    ld a2, 48(a0)
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB63_8
> @@ -7404,8 +7470,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr
> %base, <8 x i8> %idxs, <8 x i1
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
>  ; RV64ZVE32F-NEXT:  .LBB64_4: # %else2
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -7417,8 +7484,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr
> %base, <8 x i8> %idxs, <8 x i1
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v11, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
>  ; RV64ZVE32F-NEXT:  .LBB64_6: # %else5
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
> @@ -7438,8 +7506,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr
> %base, <8 x i8> %idxs, <8 x i1
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 5
>  ; RV64ZVE32F-NEXT:  .LBB64_10: # %else14
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -7459,8 +7528,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr
> %base, <8 x i8> %idxs, <8 x i1
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB64_8
> @@ -7470,8 +7540,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr
> %base, <8 x i8> %idxs, <8 x i1
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB64_9
> @@ -7481,8 +7552,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr
> %base, <8 x i8> %idxs, <8 x i1
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB64_12
> @@ -7547,8 +7619,9 @@ define <8 x half>
> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
>  ; RV64ZVE32F-NEXT:  .LBB65_4: # %else2
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -7560,8 +7633,9 @@ define <8 x half>
> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v11, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
>  ; RV64ZVE32F-NEXT:  .LBB65_6: # %else5
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
> @@ -7581,8 +7655,9 @@ define <8 x half>
> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 5
>  ; RV64ZVE32F-NEXT:  .LBB65_10: # %else14
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -7602,8 +7677,9 @@ define <8 x half>
> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB65_8
> @@ -7613,8 +7689,9 @@ define <8 x half>
> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB65_9
> @@ -7624,8 +7701,9 @@ define <8 x half>
> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB65_12
> @@ -7693,8 +7771,9 @@ define <8 x half>
> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
>  ; RV64ZVE32F-NEXT:  .LBB66_4: # %else2
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -7707,8 +7786,9 @@ define <8 x half>
> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v11, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
>  ; RV64ZVE32F-NEXT:  .LBB66_6: # %else5
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
> @@ -7729,8 +7809,9 @@ define <8 x half>
> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 5
>  ; RV64ZVE32F-NEXT:  .LBB66_10: # %else14
>  ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
> @@ -7751,8 +7832,9 @@ define <8 x half>
> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB66_8
> @@ -7763,8 +7845,9 @@ define <8 x half>
> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB66_9
> @@ -7775,8 +7858,9 @@ define <8 x half>
> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB66_12
> @@ -7910,8 +7994,9 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base,
> <8 x i16> %idxs, <8 x i1> %m
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 1
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flh fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB67_9
> @@ -8076,16 +8161,18 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs,
> <4 x i1> %m, <4 x float> %pas
>  ; RV64ZVE32F-NEXT:  .LBB70_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB70_3
>  ; RV64ZVE32F-NEXT:  .LBB70_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB70_4
> @@ -8141,16 +8228,18 @@ define <4 x float> @mgather_truemask_v4f32(<4 x
> ptr> %ptrs, <4 x float> %passthr
>  ; RV64ZVE32F-NEXT:  .LBB71_6: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB71_3
>  ; RV64ZVE32F-NEXT:  .LBB71_7: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
>  ; RV64ZVE32F-NEXT:    andi a1, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB71_4
> @@ -8241,48 +8330,54 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs,
> <8 x i1> %m, <8 x float> %pas
>  ; RV64ZVE32F-NEXT:  .LBB73_10: # %cond.load1
>  ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 1
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 4
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB73_3
>  ; RV64ZVE32F-NEXT:  .LBB73_11: # %cond.load4
>  ; RV64ZVE32F-NEXT:    ld a2, 16(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 2
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 8
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB73_4
>  ; RV64ZVE32F-NEXT:  .LBB73_12: # %cond.load7
>  ; RV64ZVE32F-NEXT:    ld a2, 24(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 3
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB73_5
>  ; RV64ZVE32F-NEXT:  .LBB73_13: # %cond.load10
>  ; RV64ZVE32F-NEXT:    ld a2, 32(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB73_6
>  ; RV64ZVE32F-NEXT:  .LBB73_14: # %cond.load13
>  ; RV64ZVE32F-NEXT:    ld a2, 40(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 5
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 64
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB73_7
>  ; RV64ZVE32F-NEXT:  .LBB73_15: # %cond.load16
>  ; RV64ZVE32F-NEXT:    ld a2, 48(a0)
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 6
>  ; RV64ZVE32F-NEXT:    andi a1, a1, -128
>  ; RV64ZVE32F-NEXT:    beqz a1, .LBB73_8
> @@ -8410,8 +8505,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr
> %base, <8 x i8> %idxs, <8 x i
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB74_9
> @@ -8558,8 +8654,9 @@ define <8 x float>
> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB75_9
> @@ -8713,8 +8810,9 @@ define <8 x float>
> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB76_9
> @@ -8865,8 +8963,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr
> %base, <8 x i16> %idxs, <8 x
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB77_9
> @@ -9014,8 +9113,9 @@ define <8 x float>
> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB78_9
> @@ -9172,8 +9272,9 @@ define <8 x float>
> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
>  ; RV64ZVE32F-NEXT:    slli a3, a3, 2
>  ; RV64ZVE32F-NEXT:    add a3, a0, a3
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a3)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
>  ; RV64ZVE32F-NEXT:    andi a3, a2, 32
>  ; RV64ZVE32F-NEXT:    bnez a3, .LBB79_9
> @@ -9316,13 +9417,13 @@ define <8 x float> @mgather_baseidx_v8f32(ptr
> %base, <8 x i32> %idxs, <8 x i1> %
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 16
>  ; RV64ZVE32F-NEXT:    beqz a2, .LBB80_7
>  ; RV64ZVE32F-NEXT:  .LBB80_14: # %cond.load10
> -; RV64ZVE32F-NEXT:    vsetivli zero, 0, e32, m1, ta, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
>  ; RV64ZVE32F-NEXT:    slli a2, a2, 2
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB80_8
> @@ -12280,8 +12381,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base,
> <16 x i8> %idxs, <16 x i1> %m
>  ; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 4
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 32
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB97_9
> @@ -12311,8 +12413,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base,
> <16 x i8> %idxs, <16 x i1> %m
>  ; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 9, e8, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 9, e8, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 8
>  ; RV64ZVE32F-NEXT:    andi a2, a1, 512
>  ; RV64ZVE32F-NEXT:    bnez a2, .LBB97_14
> @@ -12333,8 +12436,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base,
> <16 x i8> %idxs, <16 x i1> %m
>  ; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
>  ; RV64ZVE32F-NEXT:    add a2, a0, a2
>  ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
> -; RV64ZVE32F-NEXT:    vsetivli zero, 13, e8, m1, tu, ma
> +; RV64ZVE32F-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
>  ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
> +; RV64ZVE32F-NEXT:    vsetivli zero, 13, e8, m1, tu, ma
>  ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 12
>  ; RV64ZVE32F-NEXT:    slli a2, a1, 50
>  ; RV64ZVE32F-NEXT:    bltz a2, .LBB97_20
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
> b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
> index 2ae3dad22fac..c54ca19c36ad 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
> @@ -472,8 +472,9 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -481,8 +482,9 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -629,8 +631,9 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -638,8 +641,9 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -795,8 +799,9 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -804,8 +809,9 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -1420,8 +1426,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s6
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -1429,8 +1436,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s5
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -1438,8 +1446,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s4
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
> @@ -1447,8 +1456,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s3
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
> @@ -1456,8 +1466,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 5
> @@ -1465,8 +1476,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 6
> @@ -1702,8 +1714,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s6
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -1711,8 +1724,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s5
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -1720,8 +1734,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s4
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
> @@ -1729,8 +1744,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s3
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
> @@ -1738,8 +1754,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 5
> @@ -1747,8 +1764,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 6
> @@ -2004,8 +2022,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s6
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -2013,8 +2032,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s5
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -2022,8 +2042,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s4
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
> @@ -2031,8 +2052,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s3
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
> @@ -2040,8 +2062,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 5
> @@ -2049,8 +2072,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 6
> @@ -3756,8 +3780,9 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -3765,8 +3790,9 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -3911,8 +3937,9 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -3920,8 +3947,9 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -4076,8 +4104,9 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -4085,8 +4114,9 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -4689,8 +4719,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s6
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -4698,8 +4729,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s5
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -4707,8 +4739,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s4
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
> @@ -4716,8 +4749,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s3
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
> @@ -4725,8 +4759,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 5
> @@ -4734,8 +4769,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 6
> @@ -4967,8 +5003,9 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s6
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -4976,8 +5013,9 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s5
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -4985,8 +5023,9 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s4
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
> @@ -4994,8 +5033,9 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s3
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
> @@ -5003,8 +5043,9 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 5
> @@ -5012,8 +5053,9 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 6
> @@ -5268,8 +5310,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s6
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
> @@ -5277,8 +5320,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s5
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
> @@ -5286,8 +5330,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s4
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
> @@ -5295,8 +5340,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s3
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
> @@ -5304,8 +5350,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s2
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 5
> @@ -5313,8 +5360,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
>  ; CHECK-V-NEXT:    mv a0, s1
>  ; CHECK-V-NEXT:    call __extendhfsf2 at plt
>  ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
> -; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
> +; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
>  ; CHECK-V-NEXT:    vmv.s.x v8, a0
> +; CHECK-V-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
>  ; CHECK-V-NEXT:    addi a0, sp, 16
>  ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
>  ; CHECK-V-NEXT:    vslideup.vi v10, v8, 6
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
> b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
> index 97a73be5f55a..ef89ec7112a5 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
> @@ -17,8 +17,9 @@ define <vscale x 1 x half> @insertelt_nxv1f16_0(<vscale
> x 1 x half> %v, half %el
>  define <vscale x 1 x half> @insertelt_nxv1f16_imm(<vscale x 1 x half> %v,
> half %elt) {
>  ; CHECK-LABEL: insertelt_nxv1f16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v9, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x half> %v, half %elt, i32 3
> @@ -51,8 +52,9 @@ define <vscale x 2 x half> @insertelt_nxv2f16_0(<vscale
> x 2 x half> %v, half %el
>  define <vscale x 2 x half> @insertelt_nxv2f16_imm(<vscale x 2 x half> %v,
> half %elt) {
>  ; CHECK-LABEL: insertelt_nxv2f16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v9, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x half> %v, half %elt, i32 3
> @@ -85,8 +87,9 @@ define <vscale x 4 x half> @insertelt_nxv4f16_0(<vscale
> x 4 x half> %v, half %el
>  define <vscale x 4 x half> @insertelt_nxv4f16_imm(<vscale x 4 x half> %v,
> half %elt) {
>  ; CHECK-LABEL: insertelt_nxv4f16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v9, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x half> %v, half %elt, i32 3
> @@ -119,8 +122,9 @@ define <vscale x 8 x half>
> @insertelt_nxv8f16_0(<vscale x 8 x half> %v, half %el
>  define <vscale x 8 x half> @insertelt_nxv8f16_imm(<vscale x 8 x half> %v,
> half %elt) {
>  ; CHECK-LABEL: insertelt_nxv8f16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v10, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x half> %v, half %elt, i32 3
> @@ -153,8 +157,9 @@ define <vscale x 16 x half>
> @insertelt_nxv16f16_0(<vscale x 16 x half> %v, half
>  define <vscale x 16 x half> @insertelt_nxv16f16_imm(<vscale x 16 x half>
> %v, half %elt) {
>  ; CHECK-LABEL: insertelt_nxv16f16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v12, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 16 x half> %v, half %elt, i32 3
> @@ -187,8 +192,9 @@ define <vscale x 32 x half>
> @insertelt_nxv32f16_0(<vscale x 32 x half> %v, half
>  define <vscale x 32 x half> @insertelt_nxv32f16_imm(<vscale x 32 x half>
> %v, half %elt) {
>  ; CHECK-LABEL: insertelt_nxv32f16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v16, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 32 x half> %v, half %elt, i32 3
> @@ -221,8 +227,9 @@ define <vscale x 1 x float>
> @insertelt_nxv1f32_0(<vscale x 1 x float> %v, float
>  define <vscale x 1 x float> @insertelt_nxv1f32_imm(<vscale x 1 x float>
> %v, float %elt) {
>  ; CHECK-LABEL: insertelt_nxv1f32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v9, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x float> %v, float %elt, i32 3
> @@ -255,8 +262,9 @@ define <vscale x 2 x float>
> @insertelt_nxv2f32_0(<vscale x 2 x float> %v, float
>  define <vscale x 2 x float> @insertelt_nxv2f32_imm(<vscale x 2 x float>
> %v, float %elt) {
>  ; CHECK-LABEL: insertelt_nxv2f32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v9, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x float> %v, float %elt, i32 3
> @@ -289,8 +297,9 @@ define <vscale x 4 x float>
> @insertelt_nxv4f32_0(<vscale x 4 x float> %v, float
>  define <vscale x 4 x float> @insertelt_nxv4f32_imm(<vscale x 4 x float>
> %v, float %elt) {
>  ; CHECK-LABEL: insertelt_nxv4f32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v10, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x float> %v, float %elt, i32 3
> @@ -323,8 +332,9 @@ define <vscale x 8 x float>
> @insertelt_nxv8f32_0(<vscale x 8 x float> %v, float
>  define <vscale x 8 x float> @insertelt_nxv8f32_imm(<vscale x 8 x float>
> %v, float %elt) {
>  ; CHECK-LABEL: insertelt_nxv8f32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v12, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x float> %v, float %elt, i32 3
> @@ -357,8 +367,9 @@ define <vscale x 16 x float>
> @insertelt_nxv16f32_0(<vscale x 16 x float> %v, flo
>  define <vscale x 16 x float> @insertelt_nxv16f32_imm(<vscale x 16 x
> float> %v, float %elt) {
>  ; CHECK-LABEL: insertelt_nxv16f32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v16, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 16 x float> %v, float %elt, i32 3
> @@ -391,8 +402,9 @@ define <vscale x 1 x double>
> @insertelt_nxv1f64_0(<vscale x 1 x double> %v, doub
>  define <vscale x 1 x double> @insertelt_nxv1f64_imm(<vscale x 1 x double>
> %v, double %elt) {
>  ; CHECK-LABEL: insertelt_nxv1f64_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v9, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x double> %v, double %elt, i32 3
> @@ -425,8 +437,9 @@ define <vscale x 2 x double>
> @insertelt_nxv2f64_0(<vscale x 2 x double> %v, doub
>  define <vscale x 2 x double> @insertelt_nxv2f64_imm(<vscale x 2 x double>
> %v, double %elt) {
>  ; CHECK-LABEL: insertelt_nxv2f64_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v10, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x double> %v, double %elt, i32 3
> @@ -459,8 +472,9 @@ define <vscale x 4 x double>
> @insertelt_nxv4f64_0(<vscale x 4 x double> %v, doub
>  define <vscale x 4 x double> @insertelt_nxv4f64_imm(<vscale x 4 x double>
> %v, double %elt) {
>  ; CHECK-LABEL: insertelt_nxv4f64_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v12, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x double> %v, double %elt, i32 3
> @@ -493,8 +507,9 @@ define <vscale x 8 x double>
> @insertelt_nxv8f64_0(<vscale x 8 x double> %v, doub
>  define <vscale x 8 x double> @insertelt_nxv8f64_imm(<vscale x 8 x double>
> %v, double %elt) {
>  ; CHECK-LABEL: insertelt_nxv8f64_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vfmv.s.f v16, fa0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x double> %v, double %elt, i32 3
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
> b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
> index 12aebab9eb03..8d2e45eaee93 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
> @@ -15,8 +15,9 @@ define <vscale x 1 x i8> @insertelt_nxv1i8_0(<vscale x 1
> x i8> %v, i8 signext %e
>  define <vscale x 1 x i8> @insertelt_nxv1i8_imm(<vscale x 1 x i8> %v, i8
> signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv1i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x i8> %v, i8 %elt, i32 3
> @@ -49,8 +50,9 @@ define <vscale x 2 x i8> @insertelt_nxv2i8_0(<vscale x 2
> x i8> %v, i8 signext %e
>  define <vscale x 2 x i8> @insertelt_nxv2i8_imm(<vscale x 2 x i8> %v, i8
> signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv2i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x i8> %v, i8 %elt, i32 3
> @@ -83,8 +85,9 @@ define <vscale x 4 x i8> @insertelt_nxv4i8_0(<vscale x 4
> x i8> %v, i8 signext %e
>  define <vscale x 4 x i8> @insertelt_nxv4i8_imm(<vscale x 4 x i8> %v, i8
> signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv4i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x i8> %v, i8 %elt, i32 3
> @@ -117,8 +120,9 @@ define <vscale x 8 x i8> @insertelt_nxv8i8_0(<vscale x
> 8 x i8> %v, i8 signext %e
>  define <vscale x 8 x i8> @insertelt_nxv8i8_imm(<vscale x 8 x i8> %v, i8
> signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv8i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x i8> %v, i8 %elt, i32 3
> @@ -151,8 +155,9 @@ define <vscale x 16 x i8> @insertelt_nxv16i8_0(<vscale
> x 16 x i8> %v, i8 signext
>  define <vscale x 16 x i8> @insertelt_nxv16i8_imm(<vscale x 16 x i8> %v,
> i8 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv16i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v10, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 16 x i8> %v, i8 %elt, i32 3
> @@ -185,8 +190,9 @@ define <vscale x 32 x i8> @insertelt_nxv32i8_0(<vscale
> x 32 x i8> %v, i8 signext
>  define <vscale x 32 x i8> @insertelt_nxv32i8_imm(<vscale x 32 x i8> %v,
> i8 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv32i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v12, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 32 x i8> %v, i8 %elt, i32 3
> @@ -219,8 +225,9 @@ define <vscale x 64 x i8> @insertelt_nxv64i8_0(<vscale
> x 64 x i8> %v, i8 signext
>  define <vscale x 64 x i8> @insertelt_nxv64i8_imm(<vscale x 64 x i8> %v,
> i8 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv64i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v16, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 64 x i8> %v, i8 %elt, i32 3
> @@ -253,8 +260,9 @@ define <vscale x 1 x i16> @insertelt_nxv1i16_0(<vscale
> x 1 x i16> %v, i16 signex
>  define <vscale x 1 x i16> @insertelt_nxv1i16_imm(<vscale x 1 x i16> %v,
> i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv1i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x i16> %v, i16 %elt, i32 3
> @@ -287,8 +295,9 @@ define <vscale x 2 x i16> @insertelt_nxv2i16_0(<vscale
> x 2 x i16> %v, i16 signex
>  define <vscale x 2 x i16> @insertelt_nxv2i16_imm(<vscale x 2 x i16> %v,
> i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv2i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x i16> %v, i16 %elt, i32 3
> @@ -321,8 +330,9 @@ define <vscale x 4 x i16> @insertelt_nxv4i16_0(<vscale
> x 4 x i16> %v, i16 signex
>  define <vscale x 4 x i16> @insertelt_nxv4i16_imm(<vscale x 4 x i16> %v,
> i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv4i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x i16> %v, i16 %elt, i32 3
> @@ -355,8 +365,9 @@ define <vscale x 8 x i16> @insertelt_nxv8i16_0(<vscale
> x 8 x i16> %v, i16 signex
>  define <vscale x 8 x i16> @insertelt_nxv8i16_imm(<vscale x 8 x i16> %v,
> i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv8i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v10, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x i16> %v, i16 %elt, i32 3
> @@ -389,8 +400,9 @@ define <vscale x 16 x i16>
> @insertelt_nxv16i16_0(<vscale x 16 x i16> %v, i16 sig
>  define <vscale x 16 x i16> @insertelt_nxv16i16_imm(<vscale x 16 x i16>
> %v, i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv16i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v12, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 16 x i16> %v, i16 %elt, i32 3
> @@ -423,8 +435,9 @@ define <vscale x 32 x i16>
> @insertelt_nxv32i16_0(<vscale x 32 x i16> %v, i16 sig
>  define <vscale x 32 x i16> @insertelt_nxv32i16_imm(<vscale x 32 x i16>
> %v, i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv32i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v16, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 32 x i16> %v, i16 %elt, i32 3
> @@ -457,8 +470,9 @@ define <vscale x 1 x i32> @insertelt_nxv1i32_0(<vscale
> x 1 x i32> %v, i32 %elt)
>  define <vscale x 1 x i32> @insertelt_nxv1i32_imm(<vscale x 1 x i32> %v,
> i32 %elt) {
>  ; CHECK-LABEL: insertelt_nxv1i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x i32> %v, i32 %elt, i32 3
> @@ -491,8 +505,9 @@ define <vscale x 2 x i32> @insertelt_nxv2i32_0(<vscale
> x 2 x i32> %v, i32 %elt)
>  define <vscale x 2 x i32> @insertelt_nxv2i32_imm(<vscale x 2 x i32> %v,
> i32 %elt) {
>  ; CHECK-LABEL: insertelt_nxv2i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x i32> %v, i32 %elt, i32 3
> @@ -525,8 +540,9 @@ define <vscale x 4 x i32> @insertelt_nxv4i32_0(<vscale
> x 4 x i32> %v, i32 %elt)
>  define <vscale x 4 x i32> @insertelt_nxv4i32_imm(<vscale x 4 x i32> %v,
> i32 %elt) {
>  ; CHECK-LABEL: insertelt_nxv4i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v10, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x i32> %v, i32 %elt, i32 3
> @@ -559,8 +575,9 @@ define <vscale x 8 x i32> @insertelt_nxv8i32_0(<vscale
> x 8 x i32> %v, i32 %elt)
>  define <vscale x 8 x i32> @insertelt_nxv8i32_imm(<vscale x 8 x i32> %v,
> i32 %elt) {
>  ; CHECK-LABEL: insertelt_nxv8i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v12, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x i32> %v, i32 %elt, i32 3
> @@ -593,8 +610,9 @@ define <vscale x 16 x i32>
> @insertelt_nxv16i32_0(<vscale x 16 x i32> %v, i32 %el
>  define <vscale x 16 x i32> @insertelt_nxv16i32_imm(<vscale x 16 x i32>
> %v, i32 %elt) {
>  ; CHECK-LABEL: insertelt_nxv16i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v16, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 16 x i32> %v, i32 %elt, i32 3
> @@ -782,8 +800,9 @@ define <vscale x 2 x i64>
> @insertelt_nxv2i64_imm_c10(<vscale x 2 x i64> %v) {
>  ; CHECK-LABEL: insertelt_nxv2i64_imm_c10:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, 10
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v10, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x i64> %v, i64 10, i32 3
> @@ -819,8 +838,9 @@ define <vscale x 2 x i64>
> @insertelt_nxv2i64_imm_cn1(<vscale x 2 x i64> %v) {
>  ; CHECK-LABEL: insertelt_nxv2i64_imm_cn1:
>  ; CHECK:       # %bb.0:
>  ; CHECK-NEXT:    li a0, -1
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v10, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x i64> %v, i64 -1, i32 3
>
> diff  --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
> b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
> index e7b6992feafa..e493f8ca6ade 100644
> --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
> +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
> @@ -15,8 +15,9 @@ define <vscale x 1 x i8> @insertelt_nxv1i8_0(<vscale x 1
> x i8> %v, i8 signext %e
>  define <vscale x 1 x i8> @insertelt_nxv1i8_imm(<vscale x 1 x i8> %v, i8
> signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv1i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x i8> %v, i8 %elt, i32 3
> @@ -49,8 +50,9 @@ define <vscale x 2 x i8> @insertelt_nxv2i8_0(<vscale x 2
> x i8> %v, i8 signext %e
>  define <vscale x 2 x i8> @insertelt_nxv2i8_imm(<vscale x 2 x i8> %v, i8
> signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv2i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x i8> %v, i8 %elt, i32 3
> @@ -83,8 +85,9 @@ define <vscale x 4 x i8> @insertelt_nxv4i8_0(<vscale x 4
> x i8> %v, i8 signext %e
>  define <vscale x 4 x i8> @insertelt_nxv4i8_imm(<vscale x 4 x i8> %v, i8
> signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv4i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x i8> %v, i8 %elt, i32 3
> @@ -117,8 +120,9 @@ define <vscale x 8 x i8> @insertelt_nxv8i8_0(<vscale x
> 8 x i8> %v, i8 signext %e
>  define <vscale x 8 x i8> @insertelt_nxv8i8_imm(<vscale x 8 x i8> %v, i8
> signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv8i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x i8> %v, i8 %elt, i32 3
> @@ -151,8 +155,9 @@ define <vscale x 16 x i8> @insertelt_nxv16i8_0(<vscale
> x 16 x i8> %v, i8 signext
>  define <vscale x 16 x i8> @insertelt_nxv16i8_imm(<vscale x 16 x i8> %v,
> i8 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv16i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v10, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 16 x i8> %v, i8 %elt, i32 3
> @@ -185,8 +190,9 @@ define <vscale x 32 x i8> @insertelt_nxv32i8_0(<vscale
> x 32 x i8> %v, i8 signext
>  define <vscale x 32 x i8> @insertelt_nxv32i8_imm(<vscale x 32 x i8> %v,
> i8 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv32i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v12, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 32 x i8> %v, i8 %elt, i32 3
> @@ -219,8 +225,9 @@ define <vscale x 64 x i8> @insertelt_nxv64i8_0(<vscale
> x 64 x i8> %v, i8 signext
>  define <vscale x 64 x i8> @insertelt_nxv64i8_imm(<vscale x 64 x i8> %v,
> i8 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv64i8_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e8, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v16, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e8, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 64 x i8> %v, i8 %elt, i32 3
> @@ -253,8 +260,9 @@ define <vscale x 1 x i16> @insertelt_nxv1i16_0(<vscale
> x 1 x i16> %v, i16 signex
>  define <vscale x 1 x i16> @insertelt_nxv1i16_imm(<vscale x 1 x i16> %v,
> i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv1i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x i16> %v, i16 %elt, i32 3
> @@ -287,8 +295,9 @@ define <vscale x 2 x i16> @insertelt_nxv2i16_0(<vscale
> x 2 x i16> %v, i16 signex
>  define <vscale x 2 x i16> @insertelt_nxv2i16_imm(<vscale x 2 x i16> %v,
> i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv2i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x i16> %v, i16 %elt, i32 3
> @@ -321,8 +330,9 @@ define <vscale x 4 x i16> @insertelt_nxv4i16_0(<vscale
> x 4 x i16> %v, i16 signex
>  define <vscale x 4 x i16> @insertelt_nxv4i16_imm(<vscale x 4 x i16> %v,
> i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv4i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x i16> %v, i16 %elt, i32 3
> @@ -355,8 +365,9 @@ define <vscale x 8 x i16> @insertelt_nxv8i16_0(<vscale
> x 8 x i16> %v, i16 signex
>  define <vscale x 8 x i16> @insertelt_nxv8i16_imm(<vscale x 8 x i16> %v,
> i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv8i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v10, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x i16> %v, i16 %elt, i32 3
> @@ -389,8 +400,9 @@ define <vscale x 16 x i16>
> @insertelt_nxv16i16_0(<vscale x 16 x i16> %v, i16 sig
>  define <vscale x 16 x i16> @insertelt_nxv16i16_imm(<vscale x 16 x i16>
> %v, i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv16i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v12, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 16 x i16> %v, i16 %elt, i32 3
> @@ -423,8 +435,9 @@ define <vscale x 32 x i16>
> @insertelt_nxv32i16_0(<vscale x 32 x i16> %v, i16 sig
>  define <vscale x 32 x i16> @insertelt_nxv32i16_imm(<vscale x 32 x i16>
> %v, i16 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv32i16_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e16, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v16, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e16, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 32 x i16> %v, i16 %elt, i32 3
> @@ -457,8 +470,9 @@ define <vscale x 1 x i32> @insertelt_nxv1i32_0(<vscale
> x 1 x i32> %v, i32 signex
>  define <vscale x 1 x i32> @insertelt_nxv1i32_imm(<vscale x 1 x i32> %v,
> i32 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv1i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x i32> %v, i32 %elt, i32 3
> @@ -491,8 +505,9 @@ define <vscale x 2 x i32> @insertelt_nxv2i32_0(<vscale
> x 2 x i32> %v, i32 signex
>  define <vscale x 2 x i32> @insertelt_nxv2i32_imm(<vscale x 2 x i32> %v,
> i32 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv2i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x i32> %v, i32 %elt, i32 3
> @@ -525,8 +540,9 @@ define <vscale x 4 x i32> @insertelt_nxv4i32_0(<vscale
> x 4 x i32> %v, i32 signex
>  define <vscale x 4 x i32> @insertelt_nxv4i32_imm(<vscale x 4 x i32> %v,
> i32 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv4i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v10, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x i32> %v, i32 %elt, i32 3
> @@ -559,8 +575,9 @@ define <vscale x 8 x i32> @insertelt_nxv8i32_0(<vscale
> x 8 x i32> %v, i32 signex
>  define <vscale x 8 x i32> @insertelt_nxv8i32_imm(<vscale x 8 x i32> %v,
> i32 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv8i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v12, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x i32> %v, i32 %elt, i32 3
> @@ -593,8 +610,9 @@ define <vscale x 16 x i32>
> @insertelt_nxv16i32_0(<vscale x 16 x i32> %v, i32 sig
>  define <vscale x 16 x i32> @insertelt_nxv16i32_imm(<vscale x 16 x i32>
> %v, i32 signext %elt) {
>  ; CHECK-LABEL: insertelt_nxv16i32_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e32, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v16, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e32, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 16 x i32> %v, i32 %elt, i32 3
> @@ -627,8 +645,9 @@ define <vscale x 1 x i64> @insertelt_nxv1i64_0(<vscale
> x 1 x i64> %v, i64 %elt)
>  define <vscale x 1 x i64> @insertelt_nxv1i64_imm(<vscale x 1 x i64> %v,
> i64 %elt) {
>  ; CHECK-LABEL: insertelt_nxv1i64_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m1, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v9, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m1, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v9, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 1 x i64> %v, i64 %elt, i32 3
> @@ -663,8 +682,9 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_0(<vscale
> x 2 x i64> %v, i64 %elt)
>  define <vscale x 2 x i64> @insertelt_nxv2i64_imm(<vscale x 2 x i64> %v,
> i64 %elt) {
>  ; CHECK-LABEL: insertelt_nxv2i64_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v10, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v10, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 2 x i64> %v, i64 %elt, i32 3
> @@ -699,8 +719,9 @@ define <vscale x 4 x i64> @insertelt_nxv4i64_0(<vscale
> x 4 x i64> %v, i64 %elt)
>  define <vscale x 4 x i64> @insertelt_nxv4i64_imm(<vscale x 4 x i64> %v,
> i64 %elt) {
>  ; CHECK-LABEL: insertelt_nxv4i64_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m4, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v12, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m4, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v12, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 4 x i64> %v, i64 %elt, i32 3
> @@ -735,8 +756,9 @@ define <vscale x 8 x i64> @insertelt_nxv8i64_0(<vscale
> x 8 x i64> %v, i64 %elt)
>  define <vscale x 8 x i64> @insertelt_nxv8i64_imm(<vscale x 8 x i64> %v,
> i64 %elt) {
>  ; CHECK-LABEL: insertelt_nxv8i64_imm:
>  ; CHECK:       # %bb.0:
> -; CHECK-NEXT:    vsetivli zero, 4, e64, m8, tu, ma
> +; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
>  ; CHECK-NEXT:    vmv.s.x v16, a0
> +; CHECK-NEXT:    vsetivli zero, 4, e64, m8, tu, ma
>  ; CHECK-NEXT:    vslideup.vi v8, v16, 3
>  ; CHECK-NEXT:    ret
>    %r = insertelement <vscale x 8 x i64> %v, i64 %elt, i32 3
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230531/df5d468a/attachment-0001.html>


More information about the llvm-commits mailing list