[llvm] b486246 - [RISCV] Use a smaller VL when interleaving fixed vectors

Tue Feb 21 01:46:28 PST 2023

Author: Luke Lau
Date: 2023-02-21T09:46:23Z
New Revision: b486246135e94e5491e0a7c6f3ec718d32104392

URL: https://github.com/llvm/llvm-project/commit/b486246135e94e5491e0a7c6f3ec718d32104392
DIFF: https://github.com/llvm/llvm-project/commit/b486246135e94e5491e0a7c6f3ec718d32104392.diff

LOG: [RISCV] Use a smaller VL when interleaving fixed vectors

Interleaves generated with vwaddu.vv and vwmaccu.vx were using VLs that
were twice the number of elements actually needed in the vector.
This also pulls the interleaving logic out into its own function so it
can be reused by later patches, and adapts it for scalable vectors.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D144386

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
    llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cbe180c597cb6..960e3731fa753 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3227,6 +3227,78 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
       DAG.getConstant(0, DL, XLenVT));
 }
 
+// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
+// to create an interleaved vector of <[vscale x] n*2 x ty>.
+// This requires that the size of ty is less than the subtarget's maximum ELEN.
+static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, SDLoc &DL,
+                                     SelectionDAG &DAG,
+                                     const RISCVSubtarget &Subtarget) {
+  MVT VecVT = EvenV.getSimpleValueType();
+  MVT VecContainerVT = VecVT; // <vscale x n x ty>
+  // Convert fixed vectors to scalable if needed
+  if (VecContainerVT.isFixedLengthVector()) {
+    VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
+    EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
+    OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
+  }
+
+  assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN());
+
+  // We're working with a vector of the same size as the resulting
+  // interleaved vector, but with half the number of elements and
+  // twice the SEW (Hence the restriction on not using the maximum
+  // ELEN)
+  MVT WideVT =
+      MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
+                       VecVT.getVectorElementCount());
+  MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
+  if (WideContainerVT.isFixedLengthVector())
+    WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
+
+  // Bitcast the input vectors to integers in case they are FP
+  VecContainerVT = VecContainerVT.changeTypeToInteger();
+  EvenV = DAG.getBitcast(VecContainerVT, EvenV);
+  OddV = DAG.getBitcast(VecContainerVT, OddV);
+
+  auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
+  SDValue Passthru = DAG.getUNDEF(WideContainerVT);
+
+  // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
+  // vwaddu.vv
+  SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
+                                    EvenV, OddV, Passthru, Mask, VL);
+
+  // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
+  SDValue AllOnesVec = DAG.getSplatVector(
+      VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
+  SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,
+                                AllOnesVec, Passthru, Mask, VL);
+
+  // Add the two together so we get
+  //   (OddV * 0xff...ff) + (OddV + EvenV)
+  // = (OddV * 0x100...00) + EvenV
+  // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
+  // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
+  Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved,
+                            OddsMul, Passthru, Mask, VL);
+
+  // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
+  MVT ResultContainerVT = MVT::getVectorVT(
+      VecVT.getVectorElementType(), // Make sure to use original type
+      VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
+  Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
+
+  // Convert back to a fixed vector if needed
+  MVT ResultVT =
+      MVT::getVectorVT(VecVT.getVectorElementType(),
+                       VecVT.getVectorElementCount().multiplyCoefficientBy(2));
+  if (ResultVT.isFixedLengthVector())
+    Interleaved =
+        convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
+
+  return Interleaved;
+}
+
 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
                                    const RISCVSubtarget &Subtarget) {
   SDValue V1 = Op.getOperand(0);
@@ -3385,62 +3457,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
     OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
                        DAG.getConstant(OddSrc % Size, DL, XLenVT));
 
-    // Double the element width and halve the number of elements in an int type.
-    unsigned EltBits = VT.getScalarSizeInBits();
-    MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
-    MVT WideIntVT =
-        MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
-    // Convert this to a scalable vector. We need to base this on the
-    // destination size to ensure there's always a type with a smaller LMUL.
-    MVT WideIntContainerVT =
-        getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
-
-    // Convert sources to scalable vectors with the same element count as the
-    // larger type.
-    MVT HalfContainerVT = MVT::getVectorVT(
-        VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
-    EvenV = convertToScalableVector(HalfContainerVT, EvenV, DAG, Subtarget);
-    OddV = convertToScalableVector(HalfContainerVT, OddV, DAG, Subtarget);
-
-    // Cast sources to integer.
-    MVT IntEltVT = MVT::getIntegerVT(EltBits);
-    MVT IntHalfVT =
-        MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
-    EvenV = DAG.getBitcast(IntHalfVT, EvenV);
-    OddV = DAG.getBitcast(IntHalfVT, OddV);
-
-    // Freeze OddV since we use it twice and we need to be sure that the add and
-    // multiply see the same value.
-    OddV = DAG.getFreeze(OddV);
-
-    // Recreate TrueMask using the widened type's element count.
-    TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG);
-
-    // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV.
-    SDValue Add =
-        DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, EvenV, OddV,
-                    DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
-    // Create 2^eltbits - 1 copies of OddV by multiplying by the largest
-    // integer.
-    SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
-                                     DAG.getUNDEF(IntHalfVT),
-                                     DAG.getAllOnesConstant(DL, XLenVT), VL);
-    SDValue WidenMul =
-        DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT, OddV,
-                    Multiplier, DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
-    // Add the new copies to our previous addition giving us 2^eltbits copies of
-    // OddV. This is equivalent to shifting OddV left by eltbits. This should
-    // combine with the vwmulu.vv above to form vwmaccu.vv.
-    Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
-                      DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
-    // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
-    // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
-    // vector VT.
-    ContainerVT =
-        MVT::getVectorVT(VT.getVectorElementType(),
-                         WideIntContainerVT.getVectorElementCount() * 2);
-    Add = DAG.getBitcast(ContainerVT, Add);
-    return convertFromScalableVector(VT, Add, DAG, Subtarget);
+    return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
   }
 
   // Detect shuffles which can be re-expressed as vector selects; these are

diff  --git a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
index 628e0710c38c3..2d4f2960d3519 100644
--- a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
@@ -7,7 +7,7 @@ target triple = "riscv64-unknown-unknown-elf"
 define dso_local <16 x i16> @interleave(<8 x i16> %v0, <8 x i16> %v1) {
 ; CHECK-LABEL: interleave:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v8, v9
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v10, a0, v9

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index b9050aedf1add..e97a710ba4129 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -9,7 +9,7 @@
 define <4 x half> @interleave_v2f16(<2 x half> %x, <2 x half> %y) {
 ; CHECK-LABEL: interleave_v2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v8, v9
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
@@ -23,7 +23,7 @@ define <4 x half> @interleave_v2f16(<2 x half> %x, <2 x half> %y) {
 define <4 x float> @interleave_v2f32(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: interleave_v2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v9, v8
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v10, a0, v8
@@ -97,7 +97,7 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
 define <8 x half> @interleave_v4f16(<4 x half> %x, <4 x half> %y) {
 ; V128-LABEL: interleave_v4f16:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v8, v9
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v9
@@ -106,7 +106,7 @@ define <8 x half> @interleave_v4f16(<4 x half> %x, <4 x half> %y) {
 ;
 ; V512-LABEL: interleave_v4f16:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, ma
+; V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -119,7 +119,7 @@ define <8 x half> @interleave_v4f16(<4 x half> %x, <4 x half> %y) {
 define <8 x float> @interleave_v4f32(<4 x float> %x, <4 x float> %y) {
 ; V128-LABEL: interleave_v4f32:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v8, v9
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v9
@@ -128,7 +128,7 @@ define <8 x float> @interleave_v4f32(<4 x float> %x, <4 x float> %y) {
 ;
 ; V512-LABEL: interleave_v4f32:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, ma
+; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -142,7 +142,7 @@ define <8 x float> @interleave_v4f32(<4 x float> %x, <4 x float> %y) {
 define <16 x half> @interleave_v8f16(<8 x half> %x, <8 x half> %y) {
 ; V128-LABEL: interleave_v8f16:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; V128-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v9, v8
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v8
@@ -151,7 +151,7 @@ define <16 x half> @interleave_v8f16(<8 x half> %x, <8 x half> %y) {
 ;
 ; V512-LABEL: interleave_v8f16:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 16, e16, mf4, ta, ma
+; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v9, v8
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v8
@@ -164,7 +164,7 @@ define <16 x half> @interleave_v8f16(<8 x half> %x, <8 x half> %y) {
 define <16 x float> @interleave_v8f32(<8 x float> %x, <8 x float> %y) {
 ; V128-LABEL: interleave_v8f32:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 16, e32, m2, ta, ma
+; V128-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; V128-NEXT:    vwaddu.vv v12, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v12, a0, v10
@@ -173,7 +173,7 @@ define <16 x float> @interleave_v8f32(<8 x float> %x, <8 x float> %y) {
 ;
 ; V512-LABEL: interleave_v8f32:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 16, e32, mf2, ta, ma
+; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -186,8 +186,7 @@ define <16 x float> @interleave_v8f32(<8 x float> %x, <8 x float> %y) {
 define <32 x half> @interleave_v16f16(<16 x half> %x, <16 x half> %y) {
 ; V128-LABEL: interleave_v16f16:
 ; V128:       # %bb.0:
-; V128-NEXT:    li a0, 32
-; V128-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; V128-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; V128-NEXT:    vwaddu.vv v12, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v12, a0, v10
@@ -196,8 +195,7 @@ define <32 x half> @interleave_v16f16(<16 x half> %x, <16 x half> %y) {
 ;
 ; V512-LABEL: interleave_v16f16:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 32
-; V512-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; V512-NEXT:    vsetivli zero, 16, e16, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -210,8 +208,7 @@ define <32 x half> @interleave_v16f16(<16 x half> %x, <16 x half> %y) {
 define <32 x float> @interleave_v16f32(<16 x float> %x, <16 x float> %y) {
 ; V128-LABEL: interleave_v16f32:
 ; V128:       # %bb.0:
-; V128-NEXT:    li a0, 32
-; V128-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; V128-NEXT:    vwaddu.vv v16, v8, v12
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v16, a0, v12
@@ -220,8 +217,7 @@ define <32 x float> @interleave_v16f32(<16 x float> %x, <16 x float> %y) {
 ;
 ; V512-LABEL: interleave_v16f32:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 32
-; V512-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; V512-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -234,7 +230,7 @@ define <32 x float> @interleave_v16f32(<16 x float> %x, <16 x float> %y) {
 define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
 ; V128-LABEL: interleave_v32f16:
 ; V128:       # %bb.0:
-; V128-NEXT:    li a0, 64
+; V128-NEXT:    li a0, 32
 ; V128-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; V128-NEXT:    vwaddu.vv v16, v8, v12
 ; V128-NEXT:    li a0, -1
@@ -244,7 +240,7 @@ define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
 ;
 ; V512-LABEL: interleave_v32f16:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 64
+; V512-NEXT:    li a0, 32
 ; V512-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
@@ -293,7 +289,7 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
 ; RV32-V128-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV32-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
 ; RV32-V128-NEXT:    vmv.v.v v24, v8
-; RV32-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-V128-NEXT:    addi a0, sp, 16
 ; RV32-V128-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-V128-NEXT:    vwaddu.vv v0, v8, v16
@@ -344,7 +340,7 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
 ; RV64-V128-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV64-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
 ; RV64-V128-NEXT:    vmv.v.v v24, v8
-; RV64-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV64-V128-NEXT:    addi a0, sp, 16
 ; RV64-V128-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-V128-NEXT:    vwaddu.vv v0, v8, v16
@@ -360,7 +356,7 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
 ;
 ; V512-LABEL: interleave_v32f32:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 64
+; V512-NEXT:    li a0, 32
 ; V512-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; V512-NEXT:    vwaddu.vv v12, v8, v10
 ; V512-NEXT:    li a0, -1
@@ -376,7 +372,7 @@ define <4 x half> @unary_interleave_v4f16(<4 x half> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
 ; V128-NEXT:    vslidedown.vi v10, v8, 2
-; V128-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; V128-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; V128-NEXT:    vwaddu.vv v9, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v9, a0, v10
@@ -387,7 +383,6 @@ define <4 x half> @unary_interleave_v4f16(<4 x half> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 2
-; V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v8, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v10
@@ -402,7 +397,7 @@ define <4 x float> @unary_interleave_v4f32(<4 x float> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
 ; V128-NEXT:    vslidedown.vi v10, v8, 2
-; V128-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; V128-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; V128-NEXT:    vwaddu.vv v9, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v9, a0, v10
@@ -413,7 +408,6 @@ define <4 x float> @unary_interleave_v4f32(<4 x float> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 2
-; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v8, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v10
@@ -473,7 +467,7 @@ define <8 x half> @unary_interleave_v8f16(<8 x half> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
 ; V128-NEXT:    vslidedown.vi v10, v8, 4
-; V128-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; V128-NEXT:    vwaddu.vv v9, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v9, a0, v10
@@ -484,7 +478,6 @@ define <8 x half> @unary_interleave_v8f16(<8 x half> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 4
-; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v8, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v10
@@ -499,7 +492,7 @@ define <8 x float> @unary_interleave_v8f32(<8 x float> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
 ; V128-NEXT:    vslidedown.vi v12, v8, 4
-; V128-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v12, v8
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v8
@@ -510,7 +503,6 @@ define <8 x float> @unary_interleave_v8f32(<8 x float> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 4
-; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v10, v8
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v8

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index c148faf2f3f20..2e97c5114483b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -9,7 +9,7 @@
 define <4 x i8> @interleave_v2i8(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: interleave_v2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v8, v9
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
@@ -22,7 +22,7 @@ define <4 x i8> @interleave_v2i8(<2 x i8> %x, <2 x i8> %y) {
 define <4 x i16> @interleave_v2i16(<2 x i16> %x, <2 x i16> %y) {
 ; CHECK-LABEL: interleave_v2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v8, v9
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
@@ -36,7 +36,7 @@ define <4 x i16> @interleave_v2i16(<2 x i16> %x, <2 x i16> %y) {
 define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: interleave_v2i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v9, v8
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v10, a0, v8
@@ -110,7 +110,7 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
 define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) {
 ; V128-LABEL: interleave_v4i8:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v9, v8
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v8
@@ -119,7 +119,7 @@ define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) {
 ;
 ; V512-LABEL: interleave_v4i8:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 8, e8, mf8, ta, ma
+; V512-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v9, v8
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v8
@@ -133,7 +133,7 @@ define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) {
 define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) {
 ; V128-LABEL: interleave_v4i16:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v8, v9
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v9
@@ -142,7 +142,7 @@ define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) {
 ;
 ; V512-LABEL: interleave_v4i16:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, ma
+; V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -155,7 +155,7 @@ define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) {
 define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; V128-LABEL: interleave_v4i32:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v8, v9
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v9
@@ -164,7 +164,7 @@ define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ;
 ; V512-LABEL: interleave_v4i32:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, ma
+; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -177,7 +177,7 @@ define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
 define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) {
 ; V128-LABEL: interleave_v8i8:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 16, e8, mf2, ta, ma
+; V128-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v8, v9
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v9
@@ -186,7 +186,7 @@ define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) {
 ;
 ; V512-LABEL: interleave_v8i8:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 16, e8, mf8, ta, ma
+; V512-NEXT:    vsetivli zero, 8, e8, mf8, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -200,7 +200,7 @@ define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) {
 define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; V128-LABEL: interleave_v8i16:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; V128-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v9, v8
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v8
@@ -209,7 +209,7 @@ define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ;
 ; V512-LABEL: interleave_v8i16:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 16, e16, mf4, ta, ma
+; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v9, v8
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v8
@@ -222,7 +222,7 @@ define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
 define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; V128-LABEL: interleave_v8i32:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 16, e32, m2, ta, ma
+; V128-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; V128-NEXT:    vwaddu.vv v12, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v12, a0, v10
@@ -231,7 +231,7 @@ define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ;
 ; V512-LABEL: interleave_v8i32:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 16, e32, mf2, ta, ma
+; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -244,8 +244,7 @@ define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
 define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ; V128-LABEL: interleave_v16i8:
 ; V128:       # %bb.0:
-; V128-NEXT:    li a0, 32
-; V128-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; V128-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v8, v9
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v9
@@ -254,8 +253,7 @@ define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ;
 ; V512-LABEL: interleave_v16i8:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 32
-; V512-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; V512-NEXT:    vsetivli zero, 16, e8, mf4, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -268,8 +266,7 @@ define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
 define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ; V128-LABEL: interleave_v16i16:
 ; V128:       # %bb.0:
-; V128-NEXT:    li a0, 32
-; V128-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; V128-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; V128-NEXT:    vwaddu.vv v12, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v12, a0, v10
@@ -278,8 +275,7 @@ define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ;
 ; V512-LABEL: interleave_v16i16:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 32
-; V512-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; V512-NEXT:    vsetivli zero, 16, e16, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -292,8 +288,7 @@ define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
 define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
 ; V128-LABEL: interleave_v16i32:
 ; V128:       # %bb.0:
-; V128-NEXT:    li a0, 32
-; V128-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; V128-NEXT:    vwaddu.vv v16, v8, v12
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v16, a0, v12
@@ -302,8 +297,7 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
 ;
 ; V512-LABEL: interleave_v16i32:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 32
-; V512-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; V512-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v10, a0, v9
@@ -316,7 +310,7 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
 define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
 ; V128-LABEL: interleave_v32i8:
 ; V128:       # %bb.0:
-; V128-NEXT:    li a0, 64
+; V128-NEXT:    li a0, 32
 ; V128-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 ; V128-NEXT:    vwaddu.vv v12, v8, v10
 ; V128-NEXT:    li a0, -1
@@ -326,7 +320,7 @@ define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
 ;
 ; V512-LABEL: interleave_v32i8:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 64
+; V512-NEXT:    li a0, 32
 ; V512-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
@@ -340,7 +334,7 @@ define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
 define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
 ; V128-LABEL: interleave_v32i16:
 ; V128:       # %bb.0:
-; V128-NEXT:    li a0, 64
+; V128-NEXT:    li a0, 32
 ; V128-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; V128-NEXT:    vwaddu.vv v16, v8, v12
 ; V128-NEXT:    li a0, -1
@@ -350,7 +344,7 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
 ;
 ; V512-LABEL: interleave_v32i16:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 64
+; V512-NEXT:    li a0, 32
 ; V512-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; V512-NEXT:    vwaddu.vv v10, v8, v9
 ; V512-NEXT:    li a0, -1
@@ -399,7 +393,7 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
 ; RV32-V128-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV32-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
 ; RV32-V128-NEXT:    vmv.v.v v24, v8
-; RV32-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-V128-NEXT:    addi a0, sp, 16
 ; RV32-V128-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-V128-NEXT:    vwaddu.vv v0, v8, v16
@@ -450,7 +444,7 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
 ; RV64-V128-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV64-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
 ; RV64-V128-NEXT:    vmv.v.v v24, v8
-; RV64-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV64-V128-NEXT:    addi a0, sp, 16
 ; RV64-V128-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-V128-NEXT:    vwaddu.vv v0, v8, v16
@@ -466,7 +460,7 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
 ;
 ; V512-LABEL: interleave_v32i32:
 ; V512:       # %bb.0:
-; V512-NEXT:    li a0, 64
+; V512-NEXT:    li a0, 32
 ; V512-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; V512-NEXT:    vwaddu.vv v12, v8, v10
 ; V512-NEXT:    li a0, -1
@@ -482,7 +476,7 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; V128-NEXT:    vslidedown.vi v10, v8, 2
-; V128-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
+; V128-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; V128-NEXT:    vwaddu.vv v9, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v9, a0, v10
@@ -493,7 +487,6 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 2
-; V512-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v8, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v10
@@ -508,7 +501,7 @@ define <4 x i16> @unary_interleave_v4i16(<4 x i16> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
 ; V128-NEXT:    vslidedown.vi v10, v8, 2
-; V128-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; V128-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; V128-NEXT:    vwaddu.vv v9, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v9, a0, v10
@@ -519,7 +512,6 @@ define <4 x i16> @unary_interleave_v4i16(<4 x i16> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 2
-; V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v8, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v10
@@ -534,7 +526,7 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
 ; V128-NEXT:    vslidedown.vi v10, v8, 2
-; V128-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; V128-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; V128-NEXT:    vwaddu.vv v9, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v9, a0, v10
@@ -545,7 +537,6 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 2
-; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v8, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v10
@@ -605,7 +596,7 @@ define <8 x i8> @unary_interleave_v8i8(<8 x i8> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
 ; V128-NEXT:    vslidedown.vi v10, v8, 4
-; V128-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; V128-NEXT:    vwaddu.vv v9, v8, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v9, a0, v10
@@ -616,7 +607,6 @@ define <8 x i8> @unary_interleave_v8i8(<8 x i8> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 4
-; V512-NEXT:    vsetivli zero, 8, e8, mf8, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v8, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v10
@@ -631,7 +621,7 @@ define <8 x i16> @unary_interleave_v8i16(<8 x i16> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
 ; V128-NEXT:    vslidedown.vi v10, v8, 4
-; V128-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; V128-NEXT:    vwaddu.vv v9, v10, v8
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v9, a0, v8
@@ -642,7 +632,6 @@ define <8 x i16> @unary_interleave_v8i16(<8 x i16> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 4
-; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v10, v8
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v8
@@ -657,7 +646,7 @@ define <8 x i32> @unary_interleave_v8i32(<8 x i32> %x) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
 ; V128-NEXT:    vslidedown.vi v12, v8, 4
-; V128-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
+; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; V128-NEXT:    vwaddu.vv v10, v8, v12
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v10, a0, v12
@@ -668,7 +657,6 @@ define <8 x i32> @unary_interleave_v8i32(<8 x i32> %x) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
 ; V512-NEXT:    vslidedown.vi v10, v8, 4
-; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, ma
 ; V512-NEXT:    vwaddu.vv v9, v8, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v9, a0, v10

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 3616128921602..9ce01467c83ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -327,7 +327,7 @@ define <4 x i8> @interleave_shuffles(<4 x i8> %x) {
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vrgather.vi v9, v8, 0
 ; CHECK-NEXT:    vrgather.vi v10, v8, 1
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v8, v9, v10
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v8, a0, v10

diff  --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
index 5ac279523e8b5..65baa358afa17 100644
--- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
@@ -9,11 +9,10 @@ define void @interleave256(ptr %agg.result, ptr %0, ptr %1) {
 ; RV64-1024-NEXT:    vsetvli zero, a3, e16, m2, ta, ma
 ; RV64-1024-NEXT:    vle16.v v8, (a1)
 ; RV64-1024-NEXT:    vle16.v v10, (a2)
-; RV64-1024-NEXT:    li a1, 256
-; RV64-1024-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-1024-NEXT:    vwaddu.vv v12, v8, v10
-; RV64-1024-NEXT:    li a2, -1
-; RV64-1024-NEXT:    vwmaccu.vx v12, a2, v10
+; RV64-1024-NEXT:    li a1, -1
+; RV64-1024-NEXT:    vwmaccu.vx v12, a1, v10
+; RV64-1024-NEXT:    li a1, 256
 ; RV64-1024-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; RV64-1024-NEXT:    vse16.v v12, (a0)
 ; RV64-1024-NEXT:    ret
@@ -24,11 +23,10 @@ define void @interleave256(ptr %agg.result, ptr %0, ptr %1) {
 ; RV64-2048-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
 ; RV64-2048-NEXT:    vle16.v v8, (a1)
 ; RV64-2048-NEXT:    vle16.v v9, (a2)
-; RV64-2048-NEXT:    li a1, 256
-; RV64-2048-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
 ; RV64-2048-NEXT:    vwaddu.vv v10, v8, v9
-; RV64-2048-NEXT:    li a2, -1
-; RV64-2048-NEXT:    vwmaccu.vx v10, a2, v9
+; RV64-2048-NEXT:    li a1, -1
+; RV64-2048-NEXT:    vwmaccu.vx v10, a1, v9
+; RV64-2048-NEXT:    li a1, 256
 ; RV64-2048-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-2048-NEXT:    vse16.v v10, (a0)
 ; RV64-2048-NEXT:    ret
@@ -49,11 +47,10 @@ define void @interleave512(ptr %agg.result, ptr %0, ptr %1) local_unnamed_addr {
 ; RV64-1024-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; RV64-1024-NEXT:    vle16.v v8, (a1)
 ; RV64-1024-NEXT:    vle16.v v12, (a2)
-; RV64-1024-NEXT:    li a1, 512
-; RV64-1024-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; RV64-1024-NEXT:    vwaddu.vv v16, v8, v12
-; RV64-1024-NEXT:    li a2, -1
-; RV64-1024-NEXT:    vwmaccu.vx v16, a2, v12
+; RV64-1024-NEXT:    li a1, -1
+; RV64-1024-NEXT:    vwmaccu.vx v16, a1, v12
+; RV64-1024-NEXT:    li a1, 512
 ; RV64-1024-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
 ; RV64-1024-NEXT:    vse16.v v16, (a0)
 ; RV64-1024-NEXT:    ret
@@ -64,11 +61,10 @@ define void @interleave512(ptr %agg.result, ptr %0, ptr %1) local_unnamed_addr {
 ; RV64-2048-NEXT:    vsetvli zero, a3, e16, m2, ta, ma
 ; RV64-2048-NEXT:    vle16.v v8, (a1)
 ; RV64-2048-NEXT:    vle16.v v10, (a2)
-; RV64-2048-NEXT:    li a1, 512
-; RV64-2048-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-2048-NEXT:    vwaddu.vv v12, v8, v10
-; RV64-2048-NEXT:    li a2, -1
-; RV64-2048-NEXT:    vwmaccu.vx v12, a2, v10
+; RV64-2048-NEXT:    li a1, -1
+; RV64-2048-NEXT:    vwmaccu.vx v12, a1, v10
+; RV64-2048-NEXT:    li a1, 512
 ; RV64-2048-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; RV64-2048-NEXT:    vse16.v v12, (a0)
 ; RV64-2048-NEXT:    ret